From 46f35a56dec611d06e14b28a52bf45c3bf74f9a1 Mon Sep 17 00:00:00 2001 From: Ron Kuris Date: Tue, 8 Oct 2024 08:34:33 -1000 Subject: [PATCH] For NodeStoreHeader, use bytemuck not bincode (#723) --- firewood/src/db.rs | 29 +++++---- firewood/src/manager.rs | 6 +- storage/Cargo.toml | 2 + storage/src/nodestore.rs | 134 +++++++++++++++++++-------------------- 4 files changed, 87 insertions(+), 84 deletions(-) diff --git a/firewood/src/db.rs b/firewood/src/db.rs index c60049de5..df5489c1f 100644 --- a/firewood/src/db.rs +++ b/firewood/src/db.rs @@ -63,7 +63,10 @@ impl std::fmt::Debug for DbMetrics { #[async_trait] impl api::DbView for HistoricalRev { - type Stream<'a> = MerkleKeyValueStream<'a, Self> where Self: 'a; + type Stream<'a> + = MerkleKeyValueStream<'a, Self> + where + Self: 'a; async fn root_hash(&self) -> Result, api::Error> { HashedNodeReader::root_hash(self).map_err(api::Error::IO) @@ -125,7 +128,10 @@ where { type Historical = NodeStore; - type Proposal<'p> = Proposal<'p> where Self: 'p; + type Proposal<'p> + = Proposal<'p> + where + Self: 'p; async fn revision(&self, root_hash: TrieHash) -> Result, api::Error> { let nodestore = self @@ -230,7 +236,10 @@ pub struct Proposal<'p> { #[async_trait] impl<'a> api::DbView for Proposal<'a> { - type Stream<'b> = MerkleKeyValueStream<'b, NodeStore, FileBacked>> where Self: 'b; + type Stream<'b> + = MerkleKeyValueStream<'b, NodeStore, FileBacked>> + where + Self: 'b; async fn root_hash(&self) -> Result, api::Error> { self.nodestore.root_hash().map_err(api::Error::from) @@ -313,15 +322,11 @@ impl<'a> api::Proposal for Proposal<'a> { #[cfg(test)] #[allow(clippy::unwrap_used)] mod test { - use std::{ - ops::{Deref, DerefMut}, - path::PathBuf, - }; - - use crate::{ - db::Db, - v2::api::{Db as _, DbView as _, Error, Proposal as _}, - }; + use std::ops::{Deref, DerefMut}; + use std::path::PathBuf; + + use crate::db::Db; + use crate::v2::api::{Db as _, DbView as _, Error, Proposal as _}; use super::{BatchOp, DbConfig}; diff --git a/firewood/src/manager.rs b/firewood/src/manager.rs index 8d2a4674d..4753c8dca 100644 --- a/firewood/src/manager.rs +++ b/firewood/src/manager.rs @@ -3,11 +3,11 @@ #![allow(dead_code)] -use std::collections::HashMap; +use std::collections::{HashMap, VecDeque}; +use std::io::Error; use std::num::NonZero; use std::path::PathBuf; use std::sync::Arc; -use std::{collections::VecDeque, io::Error}; use storage::logger::warn; use typed_builder::TypedBuilder; @@ -92,7 +92,7 @@ impl RevisionManager { } if truncate { - nodestore.flush_header()?; + nodestore.flush_header_with_padding()?; } Ok(manager) diff --git a/storage/Cargo.toml b/storage/Cargo.toml index 87b49a0c2..d5a8a2843 100644 --- a/storage/Cargo.toml +++ b/storage/Cargo.toml @@ -18,6 +18,8 @@ arc-swap = "1.7.1" lru = "0.12.4" metrics = "0.23.0" log = { version = "0.4.20", optional = true } +bytemuck = "1.7.0" +bytemuck_derive = "1.7.0" [dev-dependencies] rand = "0.8.5" diff --git a/storage/src/nodestore.rs b/storage/src/nodestore.rs index e932d1a09..6bf1b9955 100644 --- a/storage/src/nodestore.rs +++ b/storage/src/nodestore.rs @@ -5,6 +5,7 @@ use crate::logger::trace; use arc_swap::access::DynAccess; use arc_swap::ArcSwap; use bincode::{DefaultOptions, Options as _}; +use bytemuck_derive::{AnyBitPattern, NoUninit}; use metrics::{counter, histogram}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -100,9 +101,6 @@ const NUM_AREA_SIZES: usize = AREA_SIZES.len(); const MIN_AREA_SIZE: u64 = AREA_SIZES[0]; const MAX_AREA_SIZE: u64 = AREA_SIZES[NUM_AREA_SIZES - 1]; -const SOME_FREE_LIST_ELT_SIZE: u64 = 1 + std::mem::size_of::() as u64; -const FREE_LIST_MAX_SIZE: u64 = NUM_AREA_SIZES as u64 * SOME_FREE_LIST_ELT_SIZE; - /// Returns the index in `BLOCK_SIZES` of the smallest block size >= `n`. fn area_size_to_index(n: u64) -> Result { if n > MAX_AREA_SIZE { @@ -196,13 +194,25 @@ impl NodeStore { /// Assumes the header is written in the [ReadableStorage]. pub fn open(storage: Arc) -> Result { let mut stream = storage.stream_from(0)?; - - let header: NodeStoreHeader = DefaultOptions::new() - .deserialize_from(&mut stream) - .map_err(|e| Error::new(ErrorKind::InvalidData, e))?; + let mut header = NodeStoreHeader::new(); + let header_bytes = bytemuck::bytes_of_mut(&mut header); + stream.read_exact(header_bytes)?; drop(stream); + if header.version != Version::new() { + return Err(Error::new( + ErrorKind::InvalidData, + "Incompatible firewood version", + )); + } + if header.endian_test != 1 { + return Err(Error::new( + ErrorKind::InvalidData, + "Database cannot be opened due to difference in endianness", + )); + } + let mut nodestore = Self { header, kind: Committed { @@ -308,7 +318,7 @@ impl NodeStore { parent: parent.kind.as_nodestore_parent(), }; Ok(NodeStore { - header: parent.header.clone(), + header: parent.header, kind, storage: parent.storage.clone(), }) @@ -335,30 +345,14 @@ impl NodeStore { } } -impl NodeStore { - // TODO danlaine: Use this code in the revision management code. - // TODO danlaine: Write only the parts of the header that have changed instead of the whole thing - // fn write_header(&mut self) -> Result<(), Error> { - // let header_bytes = bincode::serialize(&self.header).map_err(|e| { - // Error::new( - // ErrorKind::InvalidData, - // format!("Failed to serialize free lists: {}", e), - // ) - // })?; - - // self.storage.write(0, header_bytes.as_slice())?; - - // Ok(()) - // } -} - impl NodeStore { /// Creates a new, empty, [NodeStore] and clobbers the underlying `storage` with an empty header. + /// This is used during testing and during the creation of an in-memory merkle for proofs pub fn new_empty_proposal(storage: Arc) -> Self { let header = NodeStoreHeader::new(); - let header_bytes = bincode::serialize(&header).expect("failed to serialize header"); + let header_bytes = bytemuck::bytes_of(&header); storage - .write(0, header_bytes.as_slice()) + .write(0, header_bytes) .expect("failed to write header"); NodeStore { header, @@ -524,7 +518,8 @@ impl From for UpdateError { /// Can be used by filesystem tooling such as "file" to identify /// the version of firewood used to create this [NodeStore] file. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize, NoUninit, AnyBitPattern)] +#[repr(transparent)] struct Version { bytes: [u8; 16], } @@ -549,10 +544,13 @@ pub type FreeLists = [Option; NUM_AREA_SIZES]; /// Persisted metadata for a [NodeStore]. /// The [NodeStoreHeader] is at the start of the ReadableStorage. -#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)] +#[derive(Copy, Debug, PartialEq, Eq, Serialize, Deserialize, Clone, NoUninit, AnyBitPattern)] +#[repr(C)] struct NodeStoreHeader { /// Identifies the version of firewood used to create this [NodeStore]. version: Version, + /// always "1"; verifies endianness + endian_test: u64, size: u64, /// Element i is the pointer to the first free block of size `BLOCK_SIZES[i]`. free_lists: FreeLists, @@ -560,26 +558,22 @@ struct NodeStoreHeader { } impl NodeStoreHeader { - /// The first SIZE bytes of the ReadableStorage are the [NodeStoreHeader]. - /// The serialized NodeStoreHeader may be less than SIZE bytes but we - /// reserve this much space for it since it can grow and it must always be - /// at the start of the ReadableStorage so it can't be moved in a resize. - const SIZE: u64 = { - // 8 and 9 for `size` and `root_address` respectively - let max_size = Version::SIZE + 8 + 9 + FREE_LIST_MAX_SIZE; - // Round up to the nearest multiple of MIN_AREA_SIZE - let remainder = max_size % MIN_AREA_SIZE; - if remainder == 0 { - max_size - } else { - max_size + MIN_AREA_SIZE - remainder - } - }; + /// The first SIZE bytes of the ReadableStorage are reserved for the + /// [NodeStoreHeader]. + /// We also want it aligned to a disk block + + const SIZE: u64 = 2048; + + /// Number of extra bytes to write on the first creation of the NodeStoreHeader + /// (zero-padded) + /// also a compile time check to prevent setting SIZE too small + const EXTRA_BYTES: usize = Self::SIZE as usize - std::mem::size_of::(); fn new() -> Self { Self { // The store just contains the header at this point size: Self::SIZE, + endian_test: 1, root_address: None, version: Version::new(), free_lists: Default::default(), @@ -872,12 +866,22 @@ impl NodeStore, S> { impl NodeStore { /// Persist the header from this proposal to storage. pub fn flush_header(&self) -> Result<(), Error> { - let header_bytes = DefaultOptions::new() - .with_varint_encoding() - .serialize(&self.header) - .map_err(|e| Error::new(ErrorKind::InvalidData, e))?; - self.storage.write(0, header_bytes.as_slice())?; + let header_bytes = bytemuck::bytes_of(&self.header); + self.storage.write(0, header_bytes)?; + Ok(()) + } + /// Persist the header, including all the padding + /// This is only done the first time we write the header + pub fn flush_header_with_padding(&self) -> Result<(), Error> { + let header_bytes = bytemuck::bytes_of(&self.header) + .iter() + .copied() + .chain(std::iter::repeat(0u8).take(NodeStoreHeader::EXTRA_BYTES)) + .collect::>(); + debug_assert_eq!(header_bytes.len(), NodeStoreHeader::SIZE as usize); + + self.storage.write(0, &header_bytes)?; Ok(()) } } @@ -886,13 +890,9 @@ impl NodeStore { /// Persist the freelist from this proposal to storage. pub fn flush_freelist(&self) -> Result<(), Error> { // Write the free lists to storage - let free_list_bytes = DefaultOptions::new() - .with_varint_encoding() - .serialize(&self.header.free_lists) - .map_err(|e| Error::new(ErrorKind::InvalidData, e))?; + let free_list_bytes = bytemuck::bytes_of(&self.header.free_lists); let free_list_offset = offset_of!(NodeStoreHeader, free_lists) as u64; - self.storage - .write(free_list_offset, free_list_bytes.as_slice())?; + self.storage.write(free_list_offset, free_list_bytes)?; Ok(()) } @@ -924,7 +924,7 @@ impl NodeStore { /// This function is used during commit. pub fn as_committed(&self) -> NodeStore { NodeStore { - header: self.header.clone(), + header: self.header, kind: Committed { deleted: self.kind.deleted.clone(), root_hash: self.kind.root_hash.clone(), @@ -938,13 +938,9 @@ impl NodeStore, S> { /// Persist the freelist from this proposal to storage. pub fn flush_freelist(&self) -> Result<(), Error> { // Write the free lists to storage - let free_list_bytes = DefaultOptions::new() - .with_varint_encoding() - .serialize(&self.header.free_lists) - .map_err(|e| Error::new(ErrorKind::InvalidData, e))?; + let free_list_bytes = bytemuck::bytes_of(&self.header.free_lists); let free_list_offset = offset_of!(NodeStoreHeader, free_lists) as u64; - self.storage - .write(free_list_offset, free_list_bytes.as_slice())?; + self.storage.write(free_list_offset, free_list_bytes)?; Ok(()) } @@ -976,7 +972,7 @@ impl NodeStore, FileBacked> { /// This function is used during commit. pub fn as_committed(&self) -> NodeStore { NodeStore { - header: self.header.clone(), + header: self.header, kind: Committed { deleted: self.kind.deleted.clone(), root_hash: self.kind.root_hash.clone(), @@ -1110,7 +1106,8 @@ impl NodeStore { #[cfg(test)] #[allow(clippy::unwrap_used)] mod tests { - use crate::{linear::memory::MemStore, BranchNode, LeafNode}; + use crate::linear::memory::MemStore; + use crate::{BranchNode, LeafNode}; use arc_swap::access::DynGuard; use smallvec::SmallVec; use test_case::test_case; @@ -1184,11 +1181,10 @@ mod tests { let node_store = NodeStore::new_empty_proposal(memstore.into()); // Check the empty header is written at the start of the ReadableStorage. - let mut header_bytes = node_store.storage.stream_from(0).unwrap(); - let header: NodeStoreHeader = DefaultOptions::new() - .with_varint_encoding() - .deserialize_from(&mut header_bytes) - .unwrap(); + let mut header = NodeStoreHeader::new(); + let mut header_stream = node_store.storage.stream_from(0).unwrap(); + let header_bytes = bytemuck::bytes_of_mut(&mut header); + header_stream.read_exact(header_bytes).unwrap(); assert_eq!(header.version, Version::new()); let empty_free_list: FreeLists = Default::default(); assert_eq!(header.free_lists, empty_free_list);