diff --git a/read-fonts/generated/generated_meta.rs b/read-fonts/generated/generated_meta.rs index 6ce66051..c98acd6f 100644 --- a/read-fonts/generated/generated_meta.rs +++ b/read-fonts/generated/generated_meta.rs @@ -13,23 +13,27 @@ pub struct MetaMarker { } impl MetaMarker { - fn version_byte_range(&self) -> Range { + pub fn version_byte_range(&self) -> Range { let start = 0; start..start + u32::RAW_BYTE_LEN } - fn flags_byte_range(&self) -> Range { + + pub fn flags_byte_range(&self) -> Range { let start = self.version_byte_range().end; start..start + u32::RAW_BYTE_LEN } - fn reserved_byte_range(&self) -> Range { + + pub fn reserved_byte_range(&self) -> Range { let start = self.flags_byte_range().end; start..start + u32::RAW_BYTE_LEN } - fn data_maps_count_byte_range(&self) -> Range { + + pub fn data_maps_count_byte_range(&self) -> Range { let start = self.reserved_byte_range().end; start..start + u32::RAW_BYTE_LEN } - fn data_maps_byte_range(&self) -> Range { + + pub fn data_maps_byte_range(&self) -> Range { let start = self.data_maps_count_byte_range().end; start..start + self.data_maps_byte_len } @@ -114,7 +118,7 @@ impl<'a> std::fmt::Debug for Meta<'a> { } } -/// https://learn.microsoft.com/en-us/typography/opentype/spec/meta#table-formats +/// #[derive(Clone, Debug, Copy, bytemuck :: AnyBitPattern)] #[repr(C)] #[repr(packed)] @@ -138,6 +142,15 @@ impl DataMapRecord { self.data_offset.get() } + /// Offset in bytes from the beginning of the metadata table to the data for this tag. + /// + /// The `data` argument should be retrieved from the parent table + /// By calling its `offset_data` method. + pub fn data<'a>(&self, data: FontData<'a>) -> Result, ReadError> { + let args = (self.tag(), self.data_length()); + self.data_offset().resolve_with_args(data, &args) + } + /// Length of the data, in bytes. The data is not required to be padded to any byte boundary. pub fn data_length(&self) -> u32 { self.data_length.get() @@ -155,10 +168,7 @@ impl<'a> SomeRecord<'a> for DataMapRecord { name: "DataMapRecord", get_field: Box::new(move |idx, _data| match idx { 0usize => Some(Field::new("tag", self.tag())), - 1usize => Some(Field::new( - "data_offset", - FieldType::offset_to_array_of_scalars(self.data_offset(), self.data(_data)), - )), + 1usize => Some(Field::new("data_offset", traversal::FieldType::Unknown)), 2usize => Some(Field::new("data_length", self.data_length())), _ => None, }), diff --git a/read-fonts/src/tables/meta.rs b/read-fonts/src/tables/meta.rs index d5ba62f1..56289c29 100644 --- a/read-fonts/src/tables/meta.rs +++ b/read-fonts/src/tables/meta.rs @@ -2,14 +2,76 @@ include!("../../generated/generated_meta.rs"); -impl DataMapRecord { - /// The data under this record, interpreted from length and offset. - pub fn data<'a>(&self, data: FontData<'a>) -> Result<&'a [u8], ReadError> { - let start = self.data_offset().to_usize(); - let end = start + self.data_length() as usize; - - data.as_bytes() - .get(start..end) - .ok_or(ReadError::OutOfBounds) +pub const DLNG: Tag = Tag::new(b"dlng"); +pub const SLNG: Tag = Tag::new(b"slng"); + +/// Data stored in the 'meta' table. +pub enum Metadata<'a> { + /// Used for the 'dlng' and 'slng' metadata + ScriptLangTags(VarLenArray<'a, LangScriptTag<'a>>), + /// Other metadata, which may exist in certain apple fonts + Other(&'a [u8]), +} + +impl ReadArgs for Metadata<'_> { + type Args = (Tag, u32); +} + +impl<'a> FontReadWithArgs<'a> for Metadata<'a> { + fn read_with_args(data: FontData<'a>, args: &Self::Args) -> Result { + let (tag, len) = *args; + let data = data.slice(0..len as usize).ok_or(ReadError::OutOfBounds)?; + if [DLNG, SLNG].contains(&tag) { + VarLenArray::read(data).map(Metadata::ScriptLangTags) + } else { + Ok(Metadata::Other(data.as_bytes())) + } + } +} + +pub struct LangScriptTag<'a>(&'a str); + +impl<'a> LangScriptTag<'a> { + pub fn as_str(&self) -> &'a str { + self.0 + } +} + +impl AsRef for LangScriptTag<'_> { + fn as_ref(&self) -> &str { + self.0 + } +} + +#[cfg(feature = "std")] +impl From> for String { + fn from(value: LangScriptTag<'_>) -> Self { + value.0.into() + } +} + +impl VarSize for LangScriptTag<'_> { + type Size = u32; + + fn read_len_at(data: FontData, pos: usize) -> Option { + let bytes = data.split_off(pos)?.as_bytes(); + if bytes.is_empty() { + return None; + } + let end = data + .as_bytes() + .iter() + .position(|b| *b == b',') + .map(|pos| pos + 1) // include comma + .unwrap_or(bytes.len()); + Some(end) + } +} + +impl<'a> FontRead<'a> for LangScriptTag<'a> { + fn read(data: FontData<'a>) -> Result { + std::str::from_utf8(data.as_bytes()) + .map_err(|_| ReadError::MalformedData("LangScriptTag must be utf8")) + .map(|s| LangScriptTag(s.trim_matches(','))) } } diff --git a/resources/codegen_inputs/meta.rs b/resources/codegen_inputs/meta.rs index ec43b5d7..d09de831 100644 --- a/resources/codegen_inputs/meta.rs +++ b/resources/codegen_inputs/meta.rs @@ -21,15 +21,18 @@ table Meta { data_maps: [DataMapRecord], } -/// https://learn.microsoft.com/en-us/typography/opentype/spec/meta#table-formats +/// +#[skip_from_obj] record DataMapRecord { /// A tag indicating the type of metadata. tag: Tag, /// Offset in bytes from the beginning of the metadata table to the data for this tag. - #[offset_getter(data)] - #[compile_with(compile_map_value)] - data_offset: Offset32<[u8]>, + #[read_offset_with($tag, $data_length)] + #[traverse_with(skip)] + #[validate(validate_data_type)] + data_offset: Offset32, /// Length of the data, in bytes. The data is not required to be padded to any byte boundary. #[compile(skip)] data_length: u32, } + diff --git a/write-fonts/generated/generated_meta.rs b/write-fonts/generated/generated_meta.rs index edef6fe5..6e005934 100644 --- a/write-fonts/generated/generated_meta.rs +++ b/write-fonts/generated/generated_meta.rs @@ -70,19 +70,19 @@ impl<'a> FontRead<'a> for Meta { } } -/// https://learn.microsoft.com/en-us/typography/opentype/spec/meta#table-formats +/// #[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct DataMapRecord { /// A tag indicating the type of metadata. pub tag: Tag, /// Offset in bytes from the beginning of the metadata table to the data for this tag. - pub data: OffsetMarker, WIDTH_32>, + pub data: OffsetMarker, } impl DataMapRecord { /// Construct a new `DataMapRecord` - pub fn new(tag: Tag, data: Vec) -> Self { + pub fn new(tag: Tag, data: Metadata) -> Self { Self { tag, data: data.into(), @@ -94,7 +94,7 @@ impl FontWrite for DataMapRecord { #[allow(clippy::unnecessary_cast)] fn write_into(&self, writer: &mut TableWriter) { self.tag.write_into(writer); - (self.compile_map_value()).write_into(writer); + self.data.write_into(writer); } fn table_type(&self) -> TableType { TableType::Named("DataMapRecord") @@ -102,14 +102,11 @@ impl FontWrite for DataMapRecord { } impl Validate for DataMapRecord { - fn validate_impl(&self, _ctx: &mut ValidationCtx) {} -} - -impl FromObjRef for DataMapRecord { - fn from_obj_ref(obj: &read_fonts::tables::meta::DataMapRecord, offset_data: FontData) -> Self { - DataMapRecord { - tag: obj.tag(), - data: obj.data(offset_data).to_owned_obj(offset_data), - } + fn validate_impl(&self, ctx: &mut ValidationCtx) { + ctx.in_table("DataMapRecord", |ctx| { + ctx.in_field("data", |ctx| { + self.validate_data_type(ctx); + }); + }) } } diff --git a/write-fonts/src/tables/meta.rs b/write-fonts/src/tables/meta.rs index 7f4f4c23..d489c04e 100644 --- a/write-fonts/src/tables/meta.rs +++ b/write-fonts/src/tables/meta.rs @@ -1,29 +1,137 @@ //! The [meta (Metadata)](https://docs.microsoft.com/en-us/typography/opentype/spec/meta) table +use std::fmt::Display; + include!("../../generated/generated_meta.rs"); -impl DataMapRecord { - /// Required to append a variable length slice of bytes at the end of the - /// table, referenced by length and offset in this record. - fn compile_map_value(&self) -> MapValueAndLenWriter { - MapValueAndLenWriter(self.data.as_slice()) +pub const DLNG: Tag = Tag::new(b"dlng"); +pub const SLNG: Tag = Tag::new(b"slng"); + +/// Metadata in the `meta` table, associated with some tag. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum Metadata { + /// For the 'dlng' and 'slng' tags + ScriptLangTags(Vec), + /// For other tags + Other(Vec), +} + +/// A ['ScriptLangTag'] value. +/// +/// This is currently just a string and we do not perform any validation, +/// but we should do that (TK open issue) +/// +/// [`ScriptLangTag`]: https://learn.microsoft.com/en-us/typography/opentype/spec/meta#scriptlangtag-values +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct ScriptLangTag(String); + +/// An error for if a [`ScriptLangTag`] does not conform to the specification. +#[derive(Clone, Debug)] +#[non_exhaustive] // so we can flesh this out later without breaking anything +pub struct InvalidScriptLangTag; + +impl ScriptLangTag { + pub fn new(raw: String) -> Result { + Ok(Self(raw)) + } + + pub fn as_str(&self) -> &str { + self.0.as_str() } } -struct MapValueAndLenWriter<'a>(&'a [u8]); +impl Display for InvalidScriptLangTag { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("ScriptLangTag was malformed") + } +} -impl FontWrite for MapValueAndLenWriter<'_> { - fn write_into(&self, writer: &mut TableWriter) { - let length = u32::try_from(self.0.len()).expect("meta record data too long: exceeds u32"); +impl std::error::Error for InvalidScriptLangTag {} - writer.write_offset(&self.0, 4); - length.write_into(writer); +impl DataMapRecord { + fn validate_data_type(&self, ctx: &mut ValidationCtx) { + if matches!( + (self.tag, self.data.as_ref()), + (SLNG | DLNG, Metadata::Other(_)) + ) { + ctx.report("'slng' or 'dlng' tags use ScriptLangTag data"); + } } } -// TODO: is this necessary? -impl FontWrite for &[u8] { +impl FontWrite for Metadata { fn write_into(&self, writer: &mut TableWriter) { - writer.write_slice(self); + let len = match self { + Metadata::ScriptLangTags(langs) => { + let mut len = 0; + for lang in langs { + if len > 0 { + b','.write_into(writer); + len += 1; + } + lang.0.as_bytes().write_into(writer); + len += lang.0.as_bytes().len(); + } + len + } + Metadata::Other(vec) => { + vec.write_into(writer); + vec.len() + } + }; + + let len: u32 = len.try_into().unwrap(); + len.write_into(writer); + } +} + +impl Validate for Metadata { + fn validate_impl(&self, _ctx: &mut ValidationCtx) {} +} + +impl FromObjRef> for Metadata { + fn from_obj_ref(from: &read_fonts::tables::meta::Metadata<'_>, _: FontData) -> Self { + match from { + read_fonts::tables::meta::Metadata::ScriptLangTags(var_len_array) => { + Self::ScriptLangTags( + var_len_array + .iter() + .flat_map(|x| { + x.ok() + .and_then(|x| ScriptLangTag::new(x.as_str().into()).ok()) + }) + .collect(), + ) + } + read_fonts::tables::meta::Metadata::Other(bytes) => Self::Other(bytes.to_vec()), + } + } +} + +impl FromTableRef> for Metadata {} + +// Note: This is required because of generated trait bounds, but we don't really +// want to use it because we want our metadata to match our tag... +impl Default for Metadata { + fn default() -> Self { + Metadata::ScriptLangTags(Vec::new()) + } +} + +impl FromObjRef for DataMapRecord { + fn from_obj_ref(obj: &read_fonts::tables::meta::DataMapRecord, offset_data: FontData) -> Self { + let data = obj + .data(offset_data) + .map(|meta| meta.to_owned_table()) + .unwrap_or_else(|_| match obj.tag() { + DLNG | SLNG => Metadata::ScriptLangTags(Vec::new()), + _ => Metadata::Other(Vec::new()), + }); + DataMapRecord { + tag: obj.tag(), + data: OffsetMarker::new(data), + } } }