add glyph id mapping
This commit is contained in:
parent
4b1d8718d6
commit
db07f3772b
112
Cargo.lock
generated
112
Cargo.lock
generated
|
@ -106,13 +106,24 @@ version = "1.0.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
|
||||
|
||||
[[package]]
|
||||
name = "deranged"
|
||||
version = "0.3.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4"
|
||||
dependencies = [
|
||||
"powerfmt",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "font-explorer"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"clap",
|
||||
"log",
|
||||
"serde",
|
||||
"simplelog",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -127,6 +138,45 @@ version = "1.70.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.170"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e"
|
||||
|
||||
[[package]]
|
||||
name = "num-conv"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
|
||||
|
||||
[[package]]
|
||||
name = "num_threads"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "powerfmt"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.89"
|
||||
|
@ -165,6 +215,17 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "simplelog"
|
||||
version = "0.12.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "16257adbfaef1ee58b1363bdc0664c9b8e1e30aed86049635fb5f147d065a9c0"
|
||||
dependencies = [
|
||||
"log",
|
||||
"termcolor",
|
||||
"time",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
|
@ -182,6 +243,48 @@ dependencies = [
|
|||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termcolor"
|
||||
version = "1.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
|
||||
dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.3.37"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21"
|
||||
dependencies = [
|
||||
"deranged",
|
||||
"itoa",
|
||||
"libc",
|
||||
"num-conv",
|
||||
"num_threads",
|
||||
"powerfmt",
|
||||
"serde",
|
||||
"time-core",
|
||||
"time-macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time-core"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
|
||||
|
||||
[[package]]
|
||||
name = "time-macros"
|
||||
version = "0.2.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de"
|
||||
dependencies = [
|
||||
"num-conv",
|
||||
"time-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.13"
|
||||
|
@ -194,6 +297,15 @@ version = "0.2.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-util"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
|
||||
dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.59.0"
|
||||
|
|
|
@ -6,4 +6,6 @@ edition = "2021"
|
|||
[dependencies]
|
||||
bincode = "1.3.3"
|
||||
clap = { version = "4.5.21", features = ["derive"] }
|
||||
log = "0.4.26"
|
||||
serde = { version = "1.0.215", features = ["derive", "serde_derive"] }
|
||||
simplelog = { version = "0.12.2", features = ["termcolor"] }
|
||||
|
|
47
src/font.rs
47
src/font.rs
|
@ -1,29 +1,56 @@
|
|||
mod table_directory;
|
||||
mod character_map;
|
||||
mod search;
|
||||
|
||||
use std::io::{BufReader, Read, Seek};
|
||||
use std::{fs::File, io::{BufReader, Read}};
|
||||
|
||||
use bincode::{Options, Result};
|
||||
use bincode::Options;
|
||||
use character_map::CharacterMap;
|
||||
use log::{debug, info};
|
||||
use table_directory::TableDirectory;
|
||||
|
||||
fn deserialize<R: Read + Seek, T: serde::de::DeserializeOwned>(reader: R) -> Result<T> {
|
||||
fn deserialize<R: Read, T: serde::de::DeserializeOwned>(reader: R) -> bincode::Result<T> {
|
||||
bincode::options().with_big_endian().with_fixint_encoding().deserialize_from::<R, T>(reader)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Font<R: Read + Seek> {
|
||||
data_source: BufReader<R>,
|
||||
table_directory: TableDirectory
|
||||
pub struct Font {
|
||||
data_source: BufReader<File>,
|
||||
table_directory: TableDirectory,
|
||||
character_map: CharacterMap
|
||||
}
|
||||
|
||||
impl<R: Read + Seek> Font<R> {
|
||||
pub fn new(reader: R) -> Result<Self> {
|
||||
let mut buf_reader = BufReader::new(reader);
|
||||
impl Font {
|
||||
pub fn new(file: File) -> bincode::Result<Self> {
|
||||
let mut buf_reader = BufReader::new(file.try_clone()?);
|
||||
let table_directory = TableDirectory::new(&mut buf_reader)?;
|
||||
|
||||
let Some(cmap) = table_directory.get_table("cmap") else {
|
||||
return Err(Box::new(bincode::ErrorKind::Custom("Missing table 'cmap'".into())));
|
||||
};
|
||||
|
||||
let character_map = CharacterMap::new(file.try_clone()?, cmap)?;
|
||||
debug!("{character_map:#?}");
|
||||
|
||||
Ok(Font {
|
||||
data_source: buf_reader,
|
||||
table_directory: table_directory
|
||||
table_directory,
|
||||
character_map
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get_data_for(&mut self, glyph: char) -> Result<(), &'static str> {
|
||||
let Some(table) = self.table_directory.get_table("glyf") else {
|
||||
return Err("Failed to get table. It doesn't exist");
|
||||
};
|
||||
|
||||
let Some(glyph_id) = self.character_map.get_char(glyph) else {
|
||||
return Err("Failed to get glyph id for char");
|
||||
};
|
||||
|
||||
info!("glyph id for char '{glyph}' is {glyph_id}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
161
src/font/character_map.rs
Normal file
161
src/font/character_map.rs
Normal file
|
@ -0,0 +1,161 @@
|
|||
use std::{fs::File, io::{BufReader, Read, Seek, SeekFrom}};
|
||||
|
||||
use bincode::{ErrorKind, Result};
|
||||
use log::{debug, error, warn};
|
||||
use serde::{de::DeserializeOwned, Deserialize};
|
||||
|
||||
use super::{deserialize, table_directory::TableDirectoryRecord, search::SearchParameters};
|
||||
|
||||
#[derive(Debug, Deserialize, Copy, Clone)]
|
||||
struct SegmentToDeltaHeader {
|
||||
_length: u16,
|
||||
_language: u16,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct SegmentToDelta {
|
||||
data_source: BufReader<File>,
|
||||
|
||||
header: SegmentToDeltaHeader,
|
||||
search_params: SearchParameters,
|
||||
|
||||
end_code: Vec<u16>,
|
||||
start_code: Vec<u16>,
|
||||
id_delta: Vec<i16>,
|
||||
id_range_offset: Vec<u16>,
|
||||
|
||||
glyph_table: u32
|
||||
}
|
||||
|
||||
impl SegmentToDelta {
|
||||
fn get_next_vec<R: Read + Seek, T: DeserializeOwned>(reader: &mut R, seg_count: u16) -> Result<Vec<T>> {
|
||||
let mut values: Vec<T> = vec!();
|
||||
for _ in 0..seg_count {
|
||||
values.push(deserialize(reader.by_ref())?);
|
||||
}
|
||||
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
fn new<R: Read + Seek>(reader: &mut R, file: File) -> Result<SegmentToDelta> {
|
||||
let header: SegmentToDeltaHeader = deserialize(reader.by_ref())?;
|
||||
let search_params: SearchParameters = deserialize(reader.by_ref())?;
|
||||
|
||||
let end_code = Self::get_next_vec(reader, search_params.seg_count_x2 >> 1)?;
|
||||
reader.seek_relative(2)?;
|
||||
let start_code = Self::get_next_vec(reader, search_params.seg_count_x2 >> 1)?;
|
||||
let id_delta = Self::get_next_vec(reader, search_params.seg_count_x2 >> 1)?;
|
||||
let id_range_offset = Self::get_next_vec(reader, search_params.seg_count_x2 >> 1)?;
|
||||
|
||||
Ok(SegmentToDelta {
|
||||
data_source: BufReader::new(file),
|
||||
glyph_table: reader.stream_position()? as u32,
|
||||
header,
|
||||
search_params,
|
||||
end_code,
|
||||
start_code,
|
||||
id_delta,
|
||||
id_range_offset
|
||||
})
|
||||
}
|
||||
|
||||
fn get_glyph_id(&mut self, c: u16) -> Option<u16> {
|
||||
// Find index of first endcode that is greater or equal to c
|
||||
let index =self.search_params.find_first_goe(self.end_code.as_slice(), c)?;
|
||||
debug!("First endcode that is >={c} has index {index} (={})", self.end_code[index]);
|
||||
|
||||
if self.start_code[index] > c {
|
||||
return None
|
||||
}
|
||||
|
||||
let id_range_offset = self.id_range_offset[index];
|
||||
debug!("range offset = {id_range_offset}");
|
||||
|
||||
if id_range_offset == 0 {
|
||||
debug!("delta = {}", self.id_delta[index]);
|
||||
return Some((c as i16 + self.id_delta[index]) as u16 % u16::MAX);
|
||||
}
|
||||
|
||||
let glyph_map_offset = ((id_range_offset >> 1) as usize) + ((c - self.start_code[index]) as usize) - index;
|
||||
if let Err(error) = self.data_source.seek(SeekFrom::Start(self.glyph_table as u64 + glyph_map_offset as u64)) {
|
||||
error!("{error}");
|
||||
return None;
|
||||
}
|
||||
|
||||
if let Ok(glyph) = deserialize::<_, u16>(self.data_source.by_ref()) {
|
||||
return Some(glyph);
|
||||
}
|
||||
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CharacterMap {
|
||||
_table_start: u32,
|
||||
_length: u32,
|
||||
|
||||
mapping_table: SegmentToDelta
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct EncodingRecord {
|
||||
platform: u16,
|
||||
encoding: u16,
|
||||
offset: u32,
|
||||
}
|
||||
|
||||
impl CharacterMap {
|
||||
fn find_unicode_table<R: Read + Seek>(reader: &mut R, num_tables: u16) -> Option<EncodingRecord> {
|
||||
for _ in 0..num_tables {
|
||||
let Ok(table) = deserialize::<_, EncodingRecord>(reader.by_ref()) else {
|
||||
return None;
|
||||
};
|
||||
|
||||
if table.platform == 0 && table.encoding == 3 {
|
||||
return Some(table);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
pub fn new(file: File, cmap: TableDirectoryRecord) -> Result<CharacterMap> {
|
||||
let mut reader = BufReader::new(file.try_clone()?);
|
||||
reader.seek(SeekFrom::Start(cmap.offset as u64))?;
|
||||
|
||||
reader.seek_relative(2)?; // Skip version because i dont care
|
||||
let num_tables: u16 = deserialize(reader.by_ref())?;
|
||||
|
||||
let Some(table) = Self::find_unicode_table(&mut reader, num_tables) else {
|
||||
return Err(Box::new(ErrorKind::Custom("No encoding for Unicode 2.0 BMP".into())));
|
||||
};
|
||||
|
||||
reader.seek(SeekFrom::Start((cmap.offset + table.offset) as u64))?;
|
||||
let format: u16 = deserialize(reader.by_ref())?;
|
||||
|
||||
if format != 4 {
|
||||
todo!();
|
||||
}
|
||||
|
||||
Ok(CharacterMap {
|
||||
mapping_table: SegmentToDelta::new(reader.by_ref(), file)?,
|
||||
|
||||
_table_start: cmap.offset,
|
||||
_length: cmap.length,
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get_char(&mut self, c: char) -> Option<u16> {
|
||||
let code = c as u32;
|
||||
if code > (u16::MAX as u32) {
|
||||
warn!("Codepoint {code} for '{c}' is not a valid codepoint");
|
||||
return None;
|
||||
}
|
||||
|
||||
let code = (code & 0xFFFF) as u16;
|
||||
debug!("Getting glyph id for codepoint {code}");
|
||||
return self.mapping_table.get_glyph_id(code);
|
||||
}
|
||||
}
|
46
src/font/search.rs
Normal file
46
src/font/search.rs
Normal file
|
@ -0,0 +1,46 @@
|
|||
use std::fmt::Debug;
|
||||
|
||||
use log::debug;
|
||||
use serde::Deserialize;
|
||||
|
||||
#[derive(Debug, Copy, Clone, Deserialize, Default)]
|
||||
pub struct SearchParameters {
|
||||
pub seg_count_x2: u16, // num of elements x2
|
||||
pub search_range: u16, // largest power of two smaller than num of elements
|
||||
pub entry_selector: u16, // max levels in binary search
|
||||
pub _range_shift: u16 // remaining elements in array
|
||||
}
|
||||
|
||||
impl SearchParameters {
|
||||
fn binary_search(&self, array: &[u16], target: u16) -> Option<usize> {
|
||||
let mut index = (self.search_range >> 2) as usize;
|
||||
for level in 1..=self.entry_selector {
|
||||
let end_code = array[index];
|
||||
|
||||
if end_code == target {
|
||||
return Some(index);
|
||||
}
|
||||
|
||||
if end_code < target {
|
||||
index += (self.search_range >> (level + 2)) as usize;
|
||||
} else {
|
||||
index -= (self.search_range >> (level + 2)) as usize;
|
||||
}
|
||||
}
|
||||
|
||||
Some(index + 1)
|
||||
}
|
||||
|
||||
pub fn find_first_goe(&self, array: &[u16], target: u16) -> Option<usize> {
|
||||
if target > array[(self.seg_count_x2 as usize >> 1) - 1] {
|
||||
return None;
|
||||
}
|
||||
|
||||
if target > array[(self.search_range as usize >> 1) - 1] {
|
||||
debug!("{}", array[(self.search_range as usize >> 1) - 1]);
|
||||
todo!()
|
||||
}
|
||||
|
||||
self.binary_search(array, target)
|
||||
}
|
||||
}
|
|
@ -11,8 +11,8 @@ struct TableDirectoryHeader {
|
|||
num_tables: u16,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
struct TableDirectoryRecord {
|
||||
#[derive(Deserialize, Debug, Copy, Clone)]
|
||||
pub struct TableDirectoryRecord {
|
||||
pub table_tag: u32,
|
||||
pub checksum: u32,
|
||||
pub offset: u32,
|
||||
|
@ -36,4 +36,13 @@ impl TableDirectory {
|
|||
|
||||
Ok(TableDirectory(tables))
|
||||
}
|
||||
|
||||
pub fn get_table(&self, table: &str) -> Option<TableDirectoryRecord> {
|
||||
let table_id = table.as_bytes().first_chunk::<4>().unwrap().iter().fold(0u32, |running, &val| {
|
||||
(running << 8) | (val as u32)
|
||||
});
|
||||
|
||||
self.0.get(&table_id).copied()
|
||||
}
|
||||
|
||||
}
|
||||
|
|
33
src/main.rs
33
src/main.rs
|
@ -1,24 +1,45 @@
|
|||
mod font;
|
||||
|
||||
use std::{fs::File, io::Result};
|
||||
use std::{error::Error, fs::File};
|
||||
|
||||
use clap::Parser;
|
||||
use font::Font;
|
||||
use log::{error, LevelFilter};
|
||||
use simplelog::{ColorChoice, Config, TermLogger, TerminalMode};
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(version, about, long_about = None)]
|
||||
struct Args {
|
||||
file: String
|
||||
file: String,
|
||||
glyph: char
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
TermLogger::init(
|
||||
LevelFilter::Debug,
|
||||
Config::default(),
|
||||
TerminalMode::Mixed,
|
||||
ColorChoice::Auto
|
||||
)?;
|
||||
|
||||
let args = Args::parse();
|
||||
|
||||
let font = Font::new(File::open(args.file)?);
|
||||
let Ok(font) = font else {
|
||||
panic!("{}", font.unwrap_err().to_string());
|
||||
let Ok(mut font) = font else {
|
||||
let err = font.unwrap_err().to_string();
|
||||
error!("{err}");
|
||||
return Err(String::from(err).into());
|
||||
};
|
||||
|
||||
dbg!(font);
|
||||
// dbg!(font);
|
||||
|
||||
match font.get_data_for(args.glyph) {
|
||||
Ok(()) => {},
|
||||
Err(err) => {
|
||||
error!("{err}");
|
||||
return Err(String::from(err).into());
|
||||
}
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue