From 80e59c1741117a69c5611f3a4c2bfe6cf0ef1ca1 Mon Sep 17 00:00:00 2001 From: Federico Mena Quintero Date: Mon, 26 Aug 2019 16:50:06 +0300 Subject: [PATCH] Document the semantics of Atom's generic parameter Provide a short example that shows why interning temporary atoms will not blow up memory consumption. Fixes #212 --- src/atom.rs | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index c05ef1d..21f9e64 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -193,8 +193,46 @@ pub type DefaultAtom = Atom; /// Represents a string that has been interned. /// -/// In reality this contains a complex packed datastructure and the methods to extract information -/// from it, along with type information to tell the compiler which static set it corresponds to. +/// While the type definition for `Atom` indicates that it generic on a particular +/// implementation of an atom set, you don't need to worry about this. Atoms can be static +/// and come from a `StaticAtomSet` generated by the `string_cache_codegen` crate, or they +/// can be dynamic and created by you on an `EmptyStaticAtomSet`. +/// +/// `Atom` implements `Clone` but not `Copy`, since internally atoms are reference-counted; +/// this means that you may need to `.clone()` an atom to keep copies to it in different +/// places, or when passing it to a function that takes an `Atom` rather than an `&Atom`. +/// +/// ## Creating an atom at runtime +/// +/// If you use `string_cache_codegen` to generate a precomputed list of atoms, your code +/// may then do something like read data from somewhere and extract tokens that need to be +/// compared to the atoms. In this case, you can use `Atom::from(&str)` or +/// `Atom::from(String)`. These create a reference-counted atom which will be +/// automatically freed when all references to it are dropped. +/// +/// This means that your application can safely have a loop which tokenizes data, creates +/// atoms from the tokens, and compares the atoms to a predefined set of keywords, without +/// running the risk of arbitrary memory consumption from creating large numbers of atoms — +/// as long as your application does not store clones of the atoms it creates along the +/// way. +/// +/// For example, the following is safe and will not consume arbitrary amounts of memory: +/// +/// ```ignore +/// let untrusted_data = "large amounts of text ..."; +/// +/// for token in untrusted_data.split_whitespace() { +/// let atom = Atom::from(token); // interns the string +/// +/// if atom == Atom::from("keyword") { +/// // handle that keyword +/// } else if atom == Atom::from("another_keyword") { +/// // handle that keyword +/// } else { +/// println!("unknown keyword"); +/// } +/// } // atom is dropped here, so it is not kept around in memory +/// ``` pub struct Atom { /// This field is public so that the `atom!()` macros can use it. /// You should not otherwise access this field.