diff --git a/Cargo.lock b/Cargo.lock index 7c719e5..1cc187e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -106,13 +106,24 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", +] + [[package]] name = "font-explorer" version = "0.1.0" dependencies = [ "bincode", "clap", + "log", "serde", + "simplelog", ] [[package]] @@ -127,6 +138,45 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itoa" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" + +[[package]] +name = "libc" +version = "0.2.170" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828" + +[[package]] +name = "log" +version = "0.4.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e" + +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "proc-macro2" version = "1.0.89" @@ -165,6 +215,17 @@ dependencies = [ "syn", ] +[[package]] +name = "simplelog" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16257adbfaef1ee58b1363bdc0664c9b8e1e30aed86049635fb5f147d065a9c0" +dependencies = [ + "log", + "termcolor", + "time", +] + [[package]] name = "strsim" version = "0.11.1" @@ -182,6 +243,48 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "time" +version = "0.3.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" +dependencies = [ + "deranged", + "itoa", + "libc", + "num-conv", + "num_threads", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "unicode-ident" version = "1.0.13" @@ -194,6 +297,15 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys", +] + [[package]] name = "windows-sys" version = "0.59.0" diff --git a/Cargo.toml b/Cargo.toml index 71c8456..bd3bf53 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,4 +6,6 @@ edition = "2021" [dependencies] bincode = "1.3.3" clap = { version = "4.5.21", features = ["derive"] } +log = "0.4.26" serde = { version = "1.0.215", features = ["derive", "serde_derive"] } +simplelog = { version = "0.12.2", features = ["termcolor"] } diff --git a/src/font.rs b/src/font.rs index 0544697..ae32898 100644 --- a/src/font.rs +++ b/src/font.rs @@ -1,29 +1,56 @@ mod table_directory; +mod character_map; +mod search; -use std::io::{BufReader, Read, Seek}; +use std::{fs::File, io::{BufReader, Read}}; -use bincode::{Options, Result}; +use bincode::Options; +use character_map::CharacterMap; +use log::{debug, info}; use table_directory::TableDirectory; -fn deserialize(reader: R) -> Result { +fn deserialize(reader: R) -> bincode::Result { bincode::options().with_big_endian().with_fixint_encoding().deserialize_from::(reader) } #[derive(Debug)] -pub struct Font { - data_source: BufReader, - table_directory: TableDirectory +pub struct Font { + data_source: BufReader, + table_directory: TableDirectory, + character_map: CharacterMap } -impl Font { - pub fn new(reader: R) -> Result { - let mut buf_reader = BufReader::new(reader); +impl Font { + pub fn new(file: File) -> bincode::Result { + let mut buf_reader = BufReader::new(file.try_clone()?); let table_directory = TableDirectory::new(&mut buf_reader)?; + let Some(cmap) = table_directory.get_table("cmap") else { + return Err(Box::new(bincode::ErrorKind::Custom("Missing table 'cmap'".into()))); + }; + + let character_map = CharacterMap::new(file.try_clone()?, cmap)?; + debug!("{character_map:#?}"); + Ok(Font { data_source: buf_reader, - table_directory: table_directory + table_directory, + character_map }) } + + pub fn get_data_for(&mut self, glyph: char) -> Result<(), &'static str> { + let Some(table) = self.table_directory.get_table("glyf") else { + return Err("Failed to get table. It doesn't exist"); + }; + + let Some(glyph_id) = self.character_map.get_char(glyph) else { + return Err("Failed to get glyph id for char"); + }; + + info!("glyph id for char '{glyph}' is {glyph_id}"); + + Ok(()) + } } diff --git a/src/font/character_map.rs b/src/font/character_map.rs new file mode 100644 index 0000000..8ce4a69 --- /dev/null +++ b/src/font/character_map.rs @@ -0,0 +1,161 @@ +use std::{fs::File, io::{BufReader, Read, Seek, SeekFrom}}; + +use bincode::{ErrorKind, Result}; +use log::{debug, error, warn}; +use serde::{de::DeserializeOwned, Deserialize}; + +use super::{deserialize, table_directory::TableDirectoryRecord, search::SearchParameters}; + +#[derive(Debug, Deserialize, Copy, Clone)] +struct SegmentToDeltaHeader { + _length: u16, + _language: u16, +} + +#[derive(Debug)] +struct SegmentToDelta { + data_source: BufReader, + + header: SegmentToDeltaHeader, + search_params: SearchParameters, + + end_code: Vec, + start_code: Vec, + id_delta: Vec, + id_range_offset: Vec, + + glyph_table: u32 +} + +impl SegmentToDelta { + fn get_next_vec(reader: &mut R, seg_count: u16) -> Result> { + let mut values: Vec = vec!(); + for _ in 0..seg_count { + values.push(deserialize(reader.by_ref())?); + } + + Ok(values) + } + + fn new(reader: &mut R, file: File) -> Result { + let header: SegmentToDeltaHeader = deserialize(reader.by_ref())?; + let search_params: SearchParameters = deserialize(reader.by_ref())?; + + let end_code = Self::get_next_vec(reader, search_params.seg_count_x2 >> 1)?; + reader.seek_relative(2)?; + let start_code = Self::get_next_vec(reader, search_params.seg_count_x2 >> 1)?; + let id_delta = Self::get_next_vec(reader, search_params.seg_count_x2 >> 1)?; + let id_range_offset = Self::get_next_vec(reader, search_params.seg_count_x2 >> 1)?; + + Ok(SegmentToDelta { + data_source: BufReader::new(file), + glyph_table: reader.stream_position()? as u32, + header, + search_params, + end_code, + start_code, + id_delta, + id_range_offset + }) + } + + fn get_glyph_id(&mut self, c: u16) -> Option { + // Find index of first endcode that is greater or equal to c + let index =self.search_params.find_first_goe(self.end_code.as_slice(), c)?; + debug!("First endcode that is >={c} has index {index} (={})", self.end_code[index]); + + if self.start_code[index] > c { + return None + } + + let id_range_offset = self.id_range_offset[index]; + debug!("range offset = {id_range_offset}"); + + if id_range_offset == 0 { + debug!("delta = {}", self.id_delta[index]); + return Some((c as i16 + self.id_delta[index]) as u16 % u16::MAX); + } + + let glyph_map_offset = ((id_range_offset >> 1) as usize) + ((c - self.start_code[index]) as usize) - index; + if let Err(error) = self.data_source.seek(SeekFrom::Start(self.glyph_table as u64 + glyph_map_offset as u64)) { + error!("{error}"); + return None; + } + + if let Ok(glyph) = deserialize::<_, u16>(self.data_source.by_ref()) { + return Some(glyph); + } + + return None; + } +} + +#[derive(Debug)] +pub struct CharacterMap { + _table_start: u32, + _length: u32, + + mapping_table: SegmentToDelta +} + +#[derive(Debug, Deserialize)] +struct EncodingRecord { + platform: u16, + encoding: u16, + offset: u32, +} + +impl CharacterMap { + fn find_unicode_table(reader: &mut R, num_tables: u16) -> Option { + for _ in 0..num_tables { + let Ok(table) = deserialize::<_, EncodingRecord>(reader.by_ref()) else { + return None; + }; + + if table.platform == 0 && table.encoding == 3 { + return Some(table); + } + } + + None + } + + pub fn new(file: File, cmap: TableDirectoryRecord) -> Result { + let mut reader = BufReader::new(file.try_clone()?); + reader.seek(SeekFrom::Start(cmap.offset as u64))?; + + reader.seek_relative(2)?; // Skip version because i dont care + let num_tables: u16 = deserialize(reader.by_ref())?; + + let Some(table) = Self::find_unicode_table(&mut reader, num_tables) else { + return Err(Box::new(ErrorKind::Custom("No encoding for Unicode 2.0 BMP".into()))); + }; + + reader.seek(SeekFrom::Start((cmap.offset + table.offset) as u64))?; + let format: u16 = deserialize(reader.by_ref())?; + + if format != 4 { + todo!(); + } + + Ok(CharacterMap { + mapping_table: SegmentToDelta::new(reader.by_ref(), file)?, + + _table_start: cmap.offset, + _length: cmap.length, + + }) + } + + pub fn get_char(&mut self, c: char) -> Option { + let code = c as u32; + if code > (u16::MAX as u32) { + warn!("Codepoint {code} for '{c}' is not a valid codepoint"); + return None; + } + + let code = (code & 0xFFFF) as u16; + debug!("Getting glyph id for codepoint {code}"); + return self.mapping_table.get_glyph_id(code); + } +} diff --git a/src/font/search.rs b/src/font/search.rs new file mode 100644 index 0000000..0d22f3e --- /dev/null +++ b/src/font/search.rs @@ -0,0 +1,46 @@ +use std::fmt::Debug; + +use log::debug; +use serde::Deserialize; + +#[derive(Debug, Copy, Clone, Deserialize, Default)] +pub struct SearchParameters { + pub seg_count_x2: u16, // num of elements x2 + pub search_range: u16, // largest power of two smaller than num of elements + pub entry_selector: u16, // max levels in binary search + pub _range_shift: u16 // remaining elements in array +} + +impl SearchParameters { + fn binary_search(&self, array: &[u16], target: u16) -> Option { + let mut index = (self.search_range >> 2) as usize; + for level in 1..=self.entry_selector { + let end_code = array[index]; + + if end_code == target { + return Some(index); + } + + if end_code < target { + index += (self.search_range >> (level + 2)) as usize; + } else { + index -= (self.search_range >> (level + 2)) as usize; + } + } + + Some(index + 1) + } + + pub fn find_first_goe(&self, array: &[u16], target: u16) -> Option { + if target > array[(self.seg_count_x2 as usize >> 1) - 1] { + return None; + } + + if target > array[(self.search_range as usize >> 1) - 1] { + debug!("{}", array[(self.search_range as usize >> 1) - 1]); + todo!() + } + + self.binary_search(array, target) + } +} diff --git a/src/font/table_directory.rs b/src/font/table_directory.rs index f213956..68eb82f 100644 --- a/src/font/table_directory.rs +++ b/src/font/table_directory.rs @@ -11,8 +11,8 @@ struct TableDirectoryHeader { num_tables: u16, } -#[derive(Deserialize, Debug)] -struct TableDirectoryRecord { +#[derive(Deserialize, Debug, Copy, Clone)] +pub struct TableDirectoryRecord { pub table_tag: u32, pub checksum: u32, pub offset: u32, @@ -36,4 +36,13 @@ impl TableDirectory { Ok(TableDirectory(tables)) } + + pub fn get_table(&self, table: &str) -> Option { + let table_id = table.as_bytes().first_chunk::<4>().unwrap().iter().fold(0u32, |running, &val| { + (running << 8) | (val as u32) + }); + + self.0.get(&table_id).copied() + } + } diff --git a/src/main.rs b/src/main.rs index 0bf4425..8aee89c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,24 +1,45 @@ mod font; -use std::{fs::File, io::Result}; +use std::{error::Error, fs::File}; use clap::Parser; use font::Font; +use log::{error, LevelFilter}; +use simplelog::{ColorChoice, Config, TermLogger, TerminalMode}; #[derive(Parser, Debug)] #[command(version, about, long_about = None)] struct Args { - file: String + file: String, + glyph: char } -fn main() -> Result<()> { +fn main() -> Result<(), Box> { + TermLogger::init( + LevelFilter::Debug, + Config::default(), + TerminalMode::Mixed, + ColorChoice::Auto + )?; + let args = Args::parse(); + let font = Font::new(File::open(args.file)?); - let Ok(font) = font else { - panic!("{}", font.unwrap_err().to_string()); + let Ok(mut font) = font else { + let err = font.unwrap_err().to_string(); + error!("{err}"); + return Err(String::from(err).into()); }; - dbg!(font); + // dbg!(font); + + match font.get_data_for(args.glyph) { + Ok(()) => {}, + Err(err) => { + error!("{err}"); + return Err(String::from(err).into()); + } + }; Ok(()) }