Use a cache eviction logic that matches our BlsCache usage pattern.

Chia-Network · Oct 28, 2024 · 4b60431 · 4b60431
1 parent d4a1a53
commit 4b60431
Show file tree

Hide file tree

Showing 7 changed files with 140 additions and 16 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -131,7 +131,6 @@ hex = "0.4.3"
 thiserror = "1.0.63"
 pyo3 = "0.22.5"
 arbitrary = "1.3.2"
-lru = "0.12.4"
 rand = "0.8.5"
 criterion = "0.5.1"
 rstest = "0.22.0"

diff --git a/crates/chia-bls/Cargo.toml b/crates/chia-bls/Cargo.toml
@@ -26,7 +26,6 @@ hex = { workspace = true }
 thiserror = { workspace = true }
 pyo3 = { workspace = true, features = ["multiple-pymethods"], optional = true }
 arbitrary = { workspace = true, optional = true }
-lru = { workspace = true }
 
 [dev-dependencies]
 rand = { workspace = true }

diff --git a/crates/chia-bls/benches/cache.rs b/crates/chia-bls/benches/cache.rs
@@ -2,7 +2,7 @@ use chia_bls::aggregate_verify;
 use chia_bls::{sign, BlsCache, SecretKey, Signature};
 use criterion::{criterion_group, criterion_main, Criterion};
 use rand::rngs::StdRng;
-use rand::{Rng, SeedableRng};
+use rand::{seq::SliceRandom, Rng, SeedableRng};
 
 fn cache_benchmark(c: &mut Criterion) {
     let mut rng = StdRng::seed_from_u64(1337);
@@ -76,6 +76,31 @@ fn cache_benchmark(c: &mut Criterion) {
             ));
         });
     });
+
+    // Add more pairs to the cache so we can evict a relatively larger number
+    for i in 1_000..20_000 {
+        let derived = sk.derive_hardened(i);
+        let pk = derived.public_key();
+        let sig = sign(&derived, msg);
+        agg_sig.aggregate(&sig);
+        pks.push(pk);
+    }
+    bls_cache.aggregate_verify(
+        pks[1_000..20_000].iter().zip([&msg].iter().cycle()),
+        &agg_sig,
+    );
+
+    c.bench_function("bls_cache.evict", |b| {
+        let mut cache = bls_cache.clone();
+        let mut pks_shuffled = pks.clone();
+        pks_shuffled.shuffle(&mut rng);
+        b.iter(|| {
+            if cache.is_empty() {
+                return;
+            }
+            cache.evict(pks_shuffled.iter().zip([&msg].iter().cycle()));
+        });
+    });
 }
 
 criterion_group!(cache, cache_benchmark);

diff --git a/crates/chia-bls/src/bls_cache.rs b/crates/chia-bls/src/bls_cache.rs
@@ -1,8 +1,8 @@
 use std::borrow::Borrow;
+use std::collections::{HashMap, HashSet, VecDeque};
 use std::num::NonZeroUsize;
 
 use chia_sha2::Sha256;
-use lru::LruCache;
 use std::sync::Mutex;
 
 use crate::{aggregate_verify_gt, hash_to_g2};
@@ -17,16 +17,37 @@ use crate::{GTElement, PublicKey, Signature};
 /// However, validating a signature where we have no cached GT elements, the
 /// aggregate_verify() primitive is faster. When long-syncing, that's
 /// preferable.
+
+#[derive(Debug, Clone)]
+struct BlsCacheData {
+    // sha256(pubkey + message) -> GTElement
+    items: HashMap<[u8; 32], GTElement>,
+    insertions_order: VecDeque<[u8; 32]>,
+    capacity: NonZeroUsize,
+}
+
+impl BlsCacheData {
+    pub fn put(&mut self, hash: [u8; 32], pairing: GTElement) {
+        // If the cache is full, remove the oldest item.
+        if self.items.len() == self.capacity.get() {
+            if let Some(oldest_key) = self.insertions_order.pop_front() {
+                self.items.remove(&oldest_key);
+            }
+        }
+        self.items.insert(hash, pairing);
+        self.insertions_order.push_back(hash);
+    }
+}
+
 #[cfg_attr(feature = "py-bindings", pyo3::pyclass(name = "BLSCache"))]
 #[derive(Debug)]
 pub struct BlsCache {
-    // sha256(pubkey + message) -> GTElement
-    cache: Mutex<LruCache<[u8; 32], GTElement>>,
+    cache: Mutex<BlsCacheData>,
 }
 
 impl Default for BlsCache {
     fn default() -> Self {
-        Self::new(NonZeroUsize::new(50000).unwrap())
+        Self::new(NonZeroUsize::new(50_000).unwrap())
     }
 }
 
@@ -39,18 +60,22 @@ impl Clone for BlsCache {
 }
 
 impl BlsCache {
-    pub fn new(cache_size: NonZeroUsize) -> Self {
+    pub fn new(capacity: NonZeroUsize) -> Self {
         Self {
-            cache: Mutex::new(LruCache::new(cache_size)),
+            cache: Mutex::new(BlsCacheData {
+                items: HashMap::new(),
+                insertions_order: VecDeque::new(),
+                capacity,
+            }),
         }
     }
 
     pub fn len(&self) -> usize {
-        self.cache.lock().expect("cache").len()
+        self.cache.lock().expect("cache").items.len()
     }
 
     pub fn is_empty(&self) -> bool {
-        self.cache.lock().expect("cache").is_empty()
+        self.cache.lock().expect("cache").items.is_empty()
     }
 
     pub fn aggregate_verify<Pk: Borrow<PublicKey>, Msg: AsRef<[u8]>>(
@@ -67,7 +92,7 @@ impl BlsCache {
             let hash: [u8; 32] = hasher.finalize();
 
             // If the pairing is in the cache, we don't need to recalculate it.
-            if let Some(pairing) = self.cache.lock().expect("cache").get(&hash).cloned() {
+            if let Some(pairing) = self.cache.lock().expect("cache").items.get(&hash).cloned() {
                 return pairing;
             }
 
@@ -88,6 +113,25 @@ impl BlsCache {
         let hash: [u8; 32] = hasher.finalize();
         self.cache.lock().expect("cache").put(hash, gt);
     }
+
+    pub fn evict<Pk, Msg>(&mut self, pks_msgs: impl IntoIterator<Item = (Pk, Msg)>)
+    where
+        Pk: Borrow<PublicKey>,
+        Msg: AsRef<[u8]>,
+    {
+        let mut hashes_to_remove = HashSet::new();
+        for (pk, msg) in pks_msgs {
+            let mut hasher = Sha256::new();
+            let mut aug_msg = pk.borrow().to_bytes().to_vec();
+            aug_msg.extend_from_slice(msg.as_ref());
+            hasher.update(&aug_msg);
+            let hash: [u8; 32] = hasher.finalize();
+            hashes_to_remove.insert(hash);
+        }
+        let mut c = self.cache.lock().expect("cache");
+        c.items.retain(|h, _| !hashes_to_remove.contains(h));
+        c.insertions_order.retain(|h| !hashes_to_remove.contains(h));
+    }
 }
 
 #[cfg(feature = "py-bindings")]
@@ -148,7 +192,7 @@ impl BlsCache {
         use pyo3::types::PyBytes;
         let ret = PyList::empty_bound(py);
         let c = self.cache.lock().expect("cache");
-        for (key, value) in &*c {
+        for (key, value) in &c.items {
             ret.append((PyBytes::new_bound(py, key), value.clone().into_py(py)))?;
         }
         Ok(ret.into())
@@ -167,6 +211,19 @@ impl BlsCache {
         }
         Ok(())
     }
+
+    #[pyo3(name = "evict")]
+    pub fn py_evict(&mut self, pks: &Bound<'_, PyList>, msgs: &Bound<'_, PyList>) -> PyResult<()> {
+        let pks = pks
+            .iter()?
+            .map(|item| item?.extract())
+            .collect::<PyResult<Vec<PublicKey>>>()?;
+        let msgs = msgs
+            .iter()?
+            .map(|item| item?.extract())
+            .collect::<PyResult<Vec<PyBackedBytes>>>()?;
+        Ok(self.evict(pks.into_iter().zip(msgs)))
+    }
 }
 
 #[cfg(test)]
@@ -261,7 +318,7 @@ pub mod tests {
         }
 
         // The cache should be full now.
-        assert_eq!(bls_cache.cache.lock().expect("cache").len(), 3);
+        assert_eq!(bls_cache.len(), 3);
 
         // Recreate first key.
         let sk = SecretKey::from_seed(&[1; 32]);
@@ -275,7 +332,9 @@ pub mod tests {
         let hash: [u8; 32] = hasher.finalize();
 
         // The first key should have been removed, since it's the oldest that's been accessed.
-        assert!(!bls_cache.cache.lock().expect("cache").contains(&hash));
+        let c = bls_cache.cache.lock().expect("cache");
+        assert!(!c.items.contains_key(&hash));
+        assert!(!c.insertions_order.contains(&hash));
     }
 
     #[test]
@@ -286,4 +345,45 @@ pub mod tests {
 
         assert!(bls_cache.aggregate_verify(pks_msgs, &Signature::default()));
     }
+
+    #[test]
+    fn test_evict() {
+        let mut bls_cache = BlsCache::new(NonZeroUsize::new(5).unwrap());
+        // Create 5 pk msg pairs and add them to the cache.
+        let mut pks_msgs = Vec::new();
+        for i in 1..=5 {
+            let sk = SecretKey::from_seed(&[i; 32]);
+            let pk = sk.public_key();
+            let msg = [42; 32];
+            let sig = sign(&sk, msg);
+            pks_msgs.push((pk, msg));
+            assert!(bls_cache.aggregate_verify([(pk, msg)], &sig));
+        }
+        assert_eq!(bls_cache.len(), 5);
+        // Evict the first and third entries.
+        let pks_msgs_to_evict = vec![pks_msgs[0], pks_msgs[2]];
+        bls_cache.evict(pks_msgs_to_evict.iter().cloned());
+        // The cache should have 3 items now.
+        assert_eq!(bls_cache.len(), 3);
+        // Check that the evicted entries are no longer in the cache.
+        for (pk, msg) in &pks_msgs_to_evict {
+            let aug_msg = [&pk.to_bytes(), msg.as_ref()].concat();
+            let mut hasher = Sha256::new();
+            hasher.update(aug_msg);
+            let hash: [u8; 32] = hasher.finalize();
+            let c = bls_cache.cache.lock().expect("cache");
+            assert!(!c.items.contains_key(&hash));
+            assert!(!c.insertions_order.contains(&hash));
+        }
+        // Check that the remaining entries are still in the cache.
+        for (pk, msg) in &[pks_msgs[1], pks_msgs[3], pks_msgs[4]] {
+            let aug_msg = [&pk.to_bytes(), msg.as_ref()].concat();
+            let mut hasher = Sha256::new();
+            hasher.update(aug_msg);
+            let hash: [u8; 32] = hasher.finalize();
+            let c = bls_cache.cache.lock().expect("cache");
+            assert!(c.items.contains_key(&hash));
+            assert!(c.insertions_order.contains(&hash));
+        }
+    }
 }
diff --git a/wheel/generate_type_stubs.py b/wheel/generate_type_stubs.py
@@ -368,6 +368,7 @@ def len(self) -> int: ...
     def aggregate_verify(self, pks: list[G1Element], msgs: list[bytes], sig: G2Element) -> bool: ...
     def items(self) -> list[tuple[bytes, GTElement]]: ...
     def update(self, other: Sequence[tuple[bytes, GTElement]]) -> None: ...
+    def evict(self, pks: list[G1Element], msgs: list[bytes]) -> None: ...
 
 @final
 class AugSchemeMPL:

diff --git a/wheel/python/chia_rs/chia_rs.pyi b/wheel/python/chia_rs/chia_rs.pyi
@@ -99,6 +99,7 @@ class BLSCache:
     def aggregate_verify(self, pks: list[G1Element], msgs: list[bytes], sig: G2Element) -> bool: ...
     def items(self) -> list[tuple[bytes, GTElement]]: ...
     def update(self, other: Sequence[tuple[bytes, GTElement]]) -> None: ...
+    def evict(self, pks: list[G1Element], msgs: list[bytes]) -> None: ...
 
 @final
 class AugSchemeMPL: