add glyph id mapping

This commit is contained in:
lauchmelder 2025-03-02 16:34:28 +01:00
parent 4b1d8718d6
commit db07f3772b
7 changed files with 396 additions and 18 deletions

112
Cargo.lock generated
View file

@ -106,13 +106,24 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]]
name = "deranged"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4"
dependencies = [
"powerfmt",
]
[[package]]
name = "font-explorer"
version = "0.1.0"
dependencies = [
"bincode",
"clap",
"log",
"serde",
"simplelog",
]
[[package]]
@ -127,6 +138,45 @@ version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "itoa"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
[[package]]
name = "libc"
version = "0.2.170"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828"
[[package]]
name = "log"
version = "0.4.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e"
[[package]]
name = "num-conv"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
[[package]]
name = "num_threads"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9"
dependencies = [
"libc",
]
[[package]]
name = "powerfmt"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "proc-macro2"
version = "1.0.89"
@ -165,6 +215,17 @@ dependencies = [
"syn",
]
[[package]]
name = "simplelog"
version = "0.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16257adbfaef1ee58b1363bdc0664c9b8e1e30aed86049635fb5f147d065a9c0"
dependencies = [
"log",
"termcolor",
"time",
]
[[package]]
name = "strsim"
version = "0.11.1"
@ -182,6 +243,48 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "termcolor"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
dependencies = [
"winapi-util",
]
[[package]]
name = "time"
version = "0.3.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21"
dependencies = [
"deranged",
"itoa",
"libc",
"num-conv",
"num_threads",
"powerfmt",
"serde",
"time-core",
"time-macros",
]
[[package]]
name = "time-core"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
[[package]]
name = "time-macros"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de"
dependencies = [
"num-conv",
"time-core",
]
[[package]]
name = "unicode-ident"
version = "1.0.13"
@ -194,6 +297,15 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "winapi-util"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys",
]
[[package]]
name = "windows-sys"
version = "0.59.0"

View file

@ -6,4 +6,6 @@ edition = "2021"
[dependencies]
bincode = "1.3.3"
clap = { version = "4.5.21", features = ["derive"] }
log = "0.4.26"
serde = { version = "1.0.215", features = ["derive", "serde_derive"] }
simplelog = { version = "0.12.2", features = ["termcolor"] }

View file

@ -1,29 +1,56 @@
mod table_directory;
mod character_map;
mod search;
use std::io::{BufReader, Read, Seek};
use std::{fs::File, io::{BufReader, Read}};
use bincode::{Options, Result};
use bincode::Options;
use character_map::CharacterMap;
use log::{debug, info};
use table_directory::TableDirectory;
fn deserialize<R: Read + Seek, T: serde::de::DeserializeOwned>(reader: R) -> Result<T> {
fn deserialize<R: Read, T: serde::de::DeserializeOwned>(reader: R) -> bincode::Result<T> {
bincode::options().with_big_endian().with_fixint_encoding().deserialize_from::<R, T>(reader)
}
#[derive(Debug)]
pub struct Font<R: Read + Seek> {
data_source: BufReader<R>,
table_directory: TableDirectory
pub struct Font {
data_source: BufReader<File>,
table_directory: TableDirectory,
character_map: CharacterMap
}
impl<R: Read + Seek> Font<R> {
pub fn new(reader: R) -> Result<Self> {
let mut buf_reader = BufReader::new(reader);
impl Font {
pub fn new(file: File) -> bincode::Result<Self> {
let mut buf_reader = BufReader::new(file.try_clone()?);
let table_directory = TableDirectory::new(&mut buf_reader)?;
let Some(cmap) = table_directory.get_table("cmap") else {
return Err(Box::new(bincode::ErrorKind::Custom("Missing table 'cmap'".into())));
};
let character_map = CharacterMap::new(file.try_clone()?, cmap)?;
debug!("{character_map:#?}");
Ok(Font {
data_source: buf_reader,
table_directory: table_directory
table_directory,
character_map
})
}
pub fn get_data_for(&mut self, glyph: char) -> Result<(), &'static str> {
let Some(table) = self.table_directory.get_table("glyf") else {
return Err("Failed to get table. It doesn't exist");
};
let Some(glyph_id) = self.character_map.get_char(glyph) else {
return Err("Failed to get glyph id for char");
};
info!("glyph id for char '{glyph}' is {glyph_id}");
Ok(())
}
}

161
src/font/character_map.rs Normal file
View file

@ -0,0 +1,161 @@
use std::{fs::File, io::{BufReader, Read, Seek, SeekFrom}};
use bincode::{ErrorKind, Result};
use log::{debug, error, warn};
use serde::{de::DeserializeOwned, Deserialize};
use super::{deserialize, table_directory::TableDirectoryRecord, search::SearchParameters};
#[derive(Debug, Deserialize, Copy, Clone)]
struct SegmentToDeltaHeader {
_length: u16,
_language: u16,
}
#[derive(Debug)]
struct SegmentToDelta {
data_source: BufReader<File>,
header: SegmentToDeltaHeader,
search_params: SearchParameters,
end_code: Vec<u16>,
start_code: Vec<u16>,
id_delta: Vec<i16>,
id_range_offset: Vec<u16>,
glyph_table: u32
}
impl SegmentToDelta {
fn get_next_vec<R: Read + Seek, T: DeserializeOwned>(reader: &mut R, seg_count: u16) -> Result<Vec<T>> {
let mut values: Vec<T> = vec!();
for _ in 0..seg_count {
values.push(deserialize(reader.by_ref())?);
}
Ok(values)
}
fn new<R: Read + Seek>(reader: &mut R, file: File) -> Result<SegmentToDelta> {
let header: SegmentToDeltaHeader = deserialize(reader.by_ref())?;
let search_params: SearchParameters = deserialize(reader.by_ref())?;
let end_code = Self::get_next_vec(reader, search_params.seg_count_x2 >> 1)?;
reader.seek_relative(2)?;
let start_code = Self::get_next_vec(reader, search_params.seg_count_x2 >> 1)?;
let id_delta = Self::get_next_vec(reader, search_params.seg_count_x2 >> 1)?;
let id_range_offset = Self::get_next_vec(reader, search_params.seg_count_x2 >> 1)?;
Ok(SegmentToDelta {
data_source: BufReader::new(file),
glyph_table: reader.stream_position()? as u32,
header,
search_params,
end_code,
start_code,
id_delta,
id_range_offset
})
}
fn get_glyph_id(&mut self, c: u16) -> Option<u16> {
// Find index of first endcode that is greater or equal to c
let index =self.search_params.find_first_goe(self.end_code.as_slice(), c)?;
debug!("First endcode that is >={c} has index {index} (={})", self.end_code[index]);
if self.start_code[index] > c {
return None
}
let id_range_offset = self.id_range_offset[index];
debug!("range offset = {id_range_offset}");
if id_range_offset == 0 {
debug!("delta = {}", self.id_delta[index]);
return Some((c as i16 + self.id_delta[index]) as u16 % u16::MAX);
}
let glyph_map_offset = ((id_range_offset >> 1) as usize) + ((c - self.start_code[index]) as usize) - index;
if let Err(error) = self.data_source.seek(SeekFrom::Start(self.glyph_table as u64 + glyph_map_offset as u64)) {
error!("{error}");
return None;
}
if let Ok(glyph) = deserialize::<_, u16>(self.data_source.by_ref()) {
return Some(glyph);
}
return None;
}
}
#[derive(Debug)]
pub struct CharacterMap {
_table_start: u32,
_length: u32,
mapping_table: SegmentToDelta
}
#[derive(Debug, Deserialize)]
struct EncodingRecord {
platform: u16,
encoding: u16,
offset: u32,
}
impl CharacterMap {
fn find_unicode_table<R: Read + Seek>(reader: &mut R, num_tables: u16) -> Option<EncodingRecord> {
for _ in 0..num_tables {
let Ok(table) = deserialize::<_, EncodingRecord>(reader.by_ref()) else {
return None;
};
if table.platform == 0 && table.encoding == 3 {
return Some(table);
}
}
None
}
pub fn new(file: File, cmap: TableDirectoryRecord) -> Result<CharacterMap> {
let mut reader = BufReader::new(file.try_clone()?);
reader.seek(SeekFrom::Start(cmap.offset as u64))?;
reader.seek_relative(2)?; // Skip version because i dont care
let num_tables: u16 = deserialize(reader.by_ref())?;
let Some(table) = Self::find_unicode_table(&mut reader, num_tables) else {
return Err(Box::new(ErrorKind::Custom("No encoding for Unicode 2.0 BMP".into())));
};
reader.seek(SeekFrom::Start((cmap.offset + table.offset) as u64))?;
let format: u16 = deserialize(reader.by_ref())?;
if format != 4 {
todo!();
}
Ok(CharacterMap {
mapping_table: SegmentToDelta::new(reader.by_ref(), file)?,
_table_start: cmap.offset,
_length: cmap.length,
})
}
pub fn get_char(&mut self, c: char) -> Option<u16> {
let code = c as u32;
if code > (u16::MAX as u32) {
warn!("Codepoint {code} for '{c}' is not a valid codepoint");
return None;
}
let code = (code & 0xFFFF) as u16;
debug!("Getting glyph id for codepoint {code}");
return self.mapping_table.get_glyph_id(code);
}
}

46
src/font/search.rs Normal file
View file

@ -0,0 +1,46 @@
use std::fmt::Debug;
use log::debug;
use serde::Deserialize;
#[derive(Debug, Copy, Clone, Deserialize, Default)]
pub struct SearchParameters {
pub seg_count_x2: u16, // num of elements x2
pub search_range: u16, // largest power of two smaller than num of elements
pub entry_selector: u16, // max levels in binary search
pub _range_shift: u16 // remaining elements in array
}
impl SearchParameters {
fn binary_search(&self, array: &[u16], target: u16) -> Option<usize> {
let mut index = (self.search_range >> 2) as usize;
for level in 1..=self.entry_selector {
let end_code = array[index];
if end_code == target {
return Some(index);
}
if end_code < target {
index += (self.search_range >> (level + 2)) as usize;
} else {
index -= (self.search_range >> (level + 2)) as usize;
}
}
Some(index + 1)
}
pub fn find_first_goe(&self, array: &[u16], target: u16) -> Option<usize> {
if target > array[(self.seg_count_x2 as usize >> 1) - 1] {
return None;
}
if target > array[(self.search_range as usize >> 1) - 1] {
debug!("{}", array[(self.search_range as usize >> 1) - 1]);
todo!()
}
self.binary_search(array, target)
}
}

View file

@ -11,8 +11,8 @@ struct TableDirectoryHeader {
num_tables: u16,
}
#[derive(Deserialize, Debug)]
struct TableDirectoryRecord {
#[derive(Deserialize, Debug, Copy, Clone)]
pub struct TableDirectoryRecord {
pub table_tag: u32,
pub checksum: u32,
pub offset: u32,
@ -36,4 +36,13 @@ impl TableDirectory {
Ok(TableDirectory(tables))
}
pub fn get_table(&self, table: &str) -> Option<TableDirectoryRecord> {
let table_id = table.as_bytes().first_chunk::<4>().unwrap().iter().fold(0u32, |running, &val| {
(running << 8) | (val as u32)
});
self.0.get(&table_id).copied()
}
}

View file

@ -1,24 +1,45 @@
mod font;
use std::{fs::File, io::Result};
use std::{error::Error, fs::File};
use clap::Parser;
use font::Font;
use log::{error, LevelFilter};
use simplelog::{ColorChoice, Config, TermLogger, TerminalMode};
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct Args {
file: String
file: String,
glyph: char
}
fn main() -> Result<()> {
fn main() -> Result<(), Box<dyn Error>> {
TermLogger::init(
LevelFilter::Debug,
Config::default(),
TerminalMode::Mixed,
ColorChoice::Auto
)?;
let args = Args::parse();
let font = Font::new(File::open(args.file)?);
let Ok(font) = font else {
panic!("{}", font.unwrap_err().to_string());
let Ok(mut font) = font else {
let err = font.unwrap_err().to_string();
error!("{err}");
return Err(String::from(err).into());
};
dbg!(font);
// dbg!(font);
match font.get_data_for(args.glyph) {
Ok(()) => {},
Err(err) => {
error!("{err}");
return Err(String::from(err).into());
}
};
Ok(())
}