Skip to content

Commit

Permalink
Prefer inline representation over static (#278)
Browse files Browse the repository at this point in the history
* Benchmarks use longer static strings

* Use inline for short strings

Closes #276.
  • Loading branch information
overlookmotel authored Jul 31, 2024
1 parent 1b636e9 commit c8fed62
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 37 deletions.
4 changes: 4 additions & 0 deletions integration-tests/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,17 @@ fn main() {
"a",
"b",
"address",
"defaults",
"area",
"body",
"font-weight",
"br",
"html",
"head",
"id",
"❤",
"❤💯",
"❤💯❤💯",
])
.write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("test_atom.rs"))
.unwrap()
Expand Down
8 changes: 4 additions & 4 deletions integration-tests/src/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ bench_all!([eq ne lt clone_string]
for longer_string = super::longer_dynamic_a, super::longer_dynamic_b);

bench_all!([eq ne intern as_ref clone is_static lt]
for static_atom = test_atom!("a"), test_atom!("b"));
for static_atom = test_atom!("defaults"), test_atom!("font-weight"));

bench_all!([intern as_ref clone is_inline]
for short_inline_atom = mk("e"), mk("f"));
Expand All @@ -168,13 +168,13 @@ bench_all!([eq ne intern as_ref clone is_dynamic lt]
for longer_dynamic_atom = mk(super::longer_dynamic_a), mk(super::longer_dynamic_b));

bench_all!([intern as_ref clone is_static]
for static_at_runtime = mk("a"), mk("b"));
for static_at_runtime = mk("defaults"), mk("font-weight"));

bench_all!([ne lt x_static y_inline]
for static_vs_inline = test_atom!("a"), mk("f"));
for static_vs_inline = test_atom!("defaults"), mk("f"));

bench_all!([ne lt x_static y_dynamic]
for static_vs_dynamic = test_atom!("a"), mk(super::longer_dynamic_b));
for static_vs_dynamic = test_atom!("defaults"), mk(super::longer_dynamic_b));

bench_all!([ne lt x_inline y_dynamic]
for inline_vs_dynamic = mk("e"), mk(super::longer_dynamic_b));
Expand Down
25 changes: 18 additions & 7 deletions integration-tests/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,12 @@ fn test_as_slice() {
#[test]
fn test_types() {
assert!(Atom::from("").is_static());
assert!(Atom::from("id").is_static());
assert!(Atom::from("body").is_static());
assert!(Atom::from("a").is_static());
assert!(Atom::from("defaults").is_static());
assert!(Atom::from("font-weight").is_static());
assert!(Atom::from("id").is_inline());
assert!(Atom::from("body").is_inline());
assert!(Atom::from("a").is_inline());
assert!(Atom::from("address").is_inline());
assert!(Atom::from("c").is_inline());
assert!(Atom::from("zz").is_inline());
assert!(Atom::from("zzz").is_inline());
Expand Down Expand Up @@ -168,11 +171,13 @@ fn repr() {
// static atom table, the tag values, etc.

// Static atoms
check_static("a", test_atom!("a"));
check_static("address", test_atom!("address"));
check_static("area", test_atom!("area"));
check_static("defaults", test_atom!("defaults"));
check_static("font-weight", test_atom!("font-weight"));

// Inline atoms
check("a", 0x0000_0000_0000_6111);
check("address", 0x7373_6572_6464_6171);
check("area", 0x0000_0061_6572_6141);
check("e", 0x0000_0000_0000_6511);
check("xyzzy", 0x0000_797A_7A79_7851);
check("xyzzy01", 0x3130_797A_7A79_7871);
Expand All @@ -193,8 +198,13 @@ fn test_threads() {

#[test]
fn atom_macro() {
assert_eq!(test_atom!("a"), Atom::from("a"));
assert_eq!(test_atom!("body"), Atom::from("body"));
assert_eq!(test_atom!("address"), Atom::from("address"));
assert_eq!(test_atom!("❤"), Atom::from("❤"));
assert_eq!(test_atom!("❤💯"), Atom::from("❤💯"));
assert_eq!(test_atom!("font-weight"), Atom::from("font-weight"));
assert_eq!(test_atom!("❤💯❤💯"), Atom::from("❤💯❤💯"));
}

#[test]
Expand Down Expand Up @@ -292,7 +302,8 @@ fn test_from_string() {

#[test]
fn test_try_static() {
assert!(Atom::try_static("head").is_some());
assert!(Atom::try_static("defaults").is_some());
assert!(Atom::try_static("head").is_none());
assert!(Atom::try_static("not in the static table").is_none());
}

Expand Down
53 changes: 37 additions & 16 deletions src/atom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,25 @@ impl<Static> Atom<Static> {
}
}

/// For the atom!() macros
#[inline(always)]
#[doc(hidden)]
pub const fn pack_inline(mut n: u64, len: u8) -> Self {
if cfg!(target_endian = "big") {
// Reverse order of top 7 bytes.
// Bottom 8 bits of `n` are zero, and we need that to remain so.
// String data is stored in top 7 bytes, tag and length in bottom byte.
n = n.to_le() << 8;
}

let data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET) | n;
Self {
// INLINE_TAG ensures this is never zero
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
phantom: PhantomData,
}
}

fn tag(&self) -> u8 {
(self.unsafe_data.get() & TAG_MASK) as u8
}
Expand Down Expand Up @@ -186,20 +205,22 @@ impl<Static: StaticAtomSet> Hash for Atom<Static> {

impl<'a, Static: StaticAtomSet> From<Cow<'a, str>> for Atom<Static> {
fn from(string_to_add: Cow<'a, str>) -> Self {
Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| {
let len = string_to_add.len();
if len <= MAX_INLINE_LEN {
let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET);
{
let dest = inline_atom_slice_mut(&mut data);
dest[..len].copy_from_slice(string_to_add.as_bytes())
}
Atom {
// INLINE_TAG ensures this is never zero
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
phantom: PhantomData,
}
} else {
let len = string_to_add.len();
if len == 0 {
Self::pack_static(Static::empty_string_index())
} else if len <= MAX_INLINE_LEN {
let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET);
{
let dest = inline_atom_slice_mut(&mut data);
dest[..len].copy_from_slice(string_to_add.as_bytes());
}
Atom {
// INLINE_TAG ensures this is never zero
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
phantom: PhantomData,
}
} else {
Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| {
let ptr: std::ptr::NonNull<Entry> = DYNAMIC_SET.insert(string_to_add, hash.g);
let data = ptr.as_ptr() as u64;
debug_assert!(0 == data & TAG_MASK);
Expand All @@ -208,8 +229,8 @@ impl<'a, Static: StaticAtomSet> From<Cow<'a, str>> for Atom<Static> {
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
phantom: PhantomData,
}
}
})
})
}
}
}

Expand Down
51 changes: 41 additions & 10 deletions string-cache-codegen/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,11 +187,19 @@ impl AtomType {
// which would cause divisions by zero in rust-phf.
self.atoms.insert(String::new());

let atoms: Vec<&str> = self.atoms.iter().map(|s| &**s).collect();
let hash_state = phf_generator::generate_hash(&atoms);
// Strings over 7 bytes + empty string added to static set.
// Otherwise stored inline.
let (static_strs, inline_strs): (Vec<_>, Vec<_>) = self
.atoms
.iter()
.map(String::as_str)
.partition(|s| s.len() > 7 || s.is_empty());

// Static strings
let hash_state = phf_generator::generate_hash(&static_strs);
let phf_generator::HashState { key, disps, map } = hash_state;
let (disps0, disps1): (Vec<_>, Vec<_>) = disps.into_iter().unzip();
let atoms: Vec<&str> = map.iter().map(|&idx| atoms[idx]).collect();
let atoms: Vec<&str> = map.iter().map(|&idx| static_strs[idx]).collect();
let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32;
let indices = 0..atoms.len() as u32;

Expand Down Expand Up @@ -228,16 +236,33 @@ impl AtomType {
let macro_name = new_term(&*self.macro_name);
let module = module.parse::<proc_macro2::TokenStream>().unwrap();
let atom_prefix = format!("ATOM_{}_", type_name.to_string().to_uppercase());
let const_names: Vec<_> = atoms
let new_const_name = |atom: &str| {
let mut name = atom_prefix.clone();
for c in atom.chars() {
name.push_str(&format!("_{:02X}", c as u32))
}
new_term(&name)
};
let const_names: Vec<_> = atoms.iter().copied().map(new_const_name).collect();

// Inline strings
let (inline_const_names, inline_values_and_lengths): (Vec<_>, Vec<_>) = inline_strs
.iter()
.map(|atom| {
let mut name = atom_prefix.clone();
for c in atom.chars() {
name.push_str(&format!("_{:02X}", c as u32))
.map(|s| {
let const_name = new_const_name(s);

let mut value = 0u64;
for (index, c) in s.bytes().enumerate() {
value = value | ((c as u64) << (index * 8 + 8));
}
new_term(&name)

let len = s.len() as u8;

(const_name, (value, len))
})
.collect();
.unzip();
let (inline_values, inline_lengths): (Vec<_>, Vec<_>) =
inline_values_and_lengths.into_iter().unzip();

quote! {
#atom_doc
Expand Down Expand Up @@ -265,13 +290,19 @@ impl AtomType {
#(
pub const #const_names: #type_name = #type_name::pack_static(#indices);
)*
#(
pub const #inline_const_names: #type_name = #type_name::pack_inline(#inline_values, #inline_lengths);
)*

#macro_doc
#[macro_export]
macro_rules! #macro_name {
#(
(#atoms) => { #module::#const_names };
)*
#(
(#inline_strs) => { #module::#inline_const_names };
)*
}
}
}
Expand Down

0 comments on commit c8fed62

Please sign in to comment.