From 566ad561e58051d3606c5852173181b776be23fe Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Sun, 27 Oct 2024 20:55:07 +0530 Subject: [PATCH] add binary_codec_sv2 doc --- .../no-serde-sv2/codec/src/codec/decodable.rs | 115 ++++++++++++++---- .../no-serde-sv2/codec/src/codec/mod.rs | 2 +- .../codec/src/datatypes/copy_data_types.rs | 16 +-- .../no-serde-sv2/codec/src/datatypes/mod.rs | 7 ++ .../datatypes/non_copy_data_types/inner.rs | 2 +- .../binary-sv2/no-serde-sv2/codec/src/lib.rs | 59 ++++++++- 6 files changed, 158 insertions(+), 43 deletions(-) diff --git a/protocols/v2/binary-sv2/no-serde-sv2/codec/src/codec/decodable.rs b/protocols/v2/binary-sv2/no-serde-sv2/codec/src/codec/decodable.rs index 8c1786c0c..8a9e44d17 100644 --- a/protocols/v2/binary-sv2/no-serde-sv2/codec/src/codec/decodable.rs +++ b/protocols/v2/binary-sv2/no-serde-sv2/codec/src/codec/decodable.rs @@ -10,16 +10,35 @@ use std::convert::TryFrom; #[cfg(not(feature = "no_std"))] use std::io::{Cursor, Read}; -/// Implmented by all the decodable structure, it can be derived for any structure composed only -/// of primitives or other Decodable types. It defines methods to parse the structure from raw -/// data and reconstruct it from decoded fields. +/// Trait that defines how a type can be decoded from raw byte data. +/// +/// This trait describes the process of decoding a data structure from a sequence of bytes. +/// Implementations use a combination of methods to extract the structure of the data, decode its +/// fields, and then construct the type from those decoded fields. It is designed to handle both +/// simple types and nested or complex data structures. +/// +/// - `get_structure`: Describes the layout of the type's fields, allowing the decoder to break down the raw data. +/// - `from_decoded_fields`: Reconstructs the type from individual decoded fields. +/// - `from_bytes`: High-level method that manages the decoding process from raw bytes. +/// - `from_reader`: Reads and decodes data from a stream, useful when working with I/O sources like files or network sockets. pub trait Decodable<'a>: Sized { + /// Returns the structure of the type. + /// + /// This method defines the layout of the data fields within the type. The structure + /// returned is used to split raw data into individual fields that can be decoded. fn get_structure(data: &[u8]) -> Result, Error>; + /// Constructs the type from decoded fields. + /// + /// After the data has been split into fields, this method combines those fields + /// back into the original type, handling nested structures or composite fields. fn from_decoded_fields(data: Vec>) -> Result; - /// Parses a structure from raw bytes by iterating through its fields and decoding them. - /// Splits the data based on field size and decodes each segment. + /// Decodes the type from raw bytes. + /// + /// This method orchestrates the decoding process, calling `get_structure` to break down + /// the raw data, decoding each field, and then using `from_decoded_fields` to reassemble + /// the fields into the original type. fn from_bytes(data: &'a mut [u8]) -> Result { let structure = Self::get_structure(data)?; let mut fields = Vec::new(); @@ -37,8 +56,11 @@ pub trait Decodable<'a>: Sized { Self::from_decoded_fields(fields) } - /// Reads a structure from a reader stream, Reads all available data into a buffer, - /// determines the structure, and then decodes each field from the buffer. + /// Decodes the type from a reader stream. + /// + /// Instead of working directly with byte slices, this method reads from an I/O source + /// like a file or a network stream. It processes all available data, decodes it, and + /// reconstructs the type. #[cfg(not(feature = "no_std"))] fn from_reader(reader: &mut impl Read) -> Result { let mut data = Vec::new(); @@ -56,8 +78,10 @@ pub trait Decodable<'a>: Sized { } } -/// Enum representing different types of primitive markers. -/// Used to define the structure of primitive data types for decoding. +/// Enum representing primitive data markers. +/// +/// These markers are used to identify primitive types such as integers, booleans, and byte arrays. +/// Each variant represents a specific type and is used during decoding to interpret raw data correctly. #[derive(Debug, Clone, Copy)] pub enum PrimitiveMarker { U8, @@ -77,22 +101,30 @@ pub enum PrimitiveMarker { B016M, } -/// Enum representing field markers, which define the structure of a data field. -/// Fields can be primitives or nested structures. +/// Enum representing field markers used to describe data structure. +/// +/// A `FieldMarker` can either be a primitive or a nested structure. The marker helps the decoder +/// understand the layout and type of each field in the data, guiding the decoding process. #[derive(Debug, Clone)] pub enum FieldMarker { Primitive(PrimitiveMarker), Struct(Vec), } -/// A trait that provides a mechanism to retrieve the marker associated with a data field. -/// This marker is used to help the decoder to identify and interpret the field type. +/// Trait that provides a mechanism to retrieve the marker associated with a data field. +/// +/// This trait defines a method for getting the marker that represents the structure or +/// type of a given field. It is used to assist in decoding by indicating how to interpret +/// the data. pub trait GetMarker { fn get_marker() -> FieldMarker; } -/// Represents a decoded primitive data type, used by the decoder to construct messages, -/// Includes various types like integers, floats and binary data. +/// Represents a decoded primitive data type. +/// +/// After decoding, the raw data is transformed into one of these variants, which represent +/// standard primitive types like integers or binary arrays. The decoder uses these values to +/// build the final structure of the message. #[derive(Debug)] pub enum DecodablePrimitive<'a> { U8(u8), @@ -112,16 +144,22 @@ pub enum DecodablePrimitive<'a> { B016M(B016M<'a>), } -/// Represents a decoded field, which may either be a primitive or a nested structure. -/// The decoder uses this to build the final decoded data. +/// Represents a decoded field, which may be primitive or a nested structure. +/// +/// Once the raw data is decoded, it is either classified as a primitive (e.g., integer, boolean) +/// or a structure, which may itself contain multiple decoded fields. This type encapsulates that +/// distinction. #[derive(Debug)] pub enum DecodableField<'a> { Primitive(DecodablePrimitive<'a>), Struct(Vec>), } -/// Provide a size hint for each primitive marker. -/// This method helps estimate the size of the data field represented by the marker. +/// Provides size hinting for each primitive marker. +/// +/// This implementation defines how to estimate the size of data represented by a `PrimitiveMarker`. +/// This is useful for efficient decoding, allowing the decoder to correctly split raw data into +/// fields of the right size. impl SizeHint for PrimitiveMarker { // PrimitiveMarker need introspection to return a size hint. This method is not implementeable fn size_hint(_data: &[u8], _offset: usize) -> Result { @@ -149,8 +187,10 @@ impl SizeHint for PrimitiveMarker { } } -/// Provides a size hint for each field marker, which may be a primitive or a nested structure. -/// Used to estimate the total size of data associated with a field. +/// Provides size hinting for each field marker, including nested structures. +/// +/// This method defines how to estimate the size of a field, whether it's a primitive or a +/// composite structure. For composite fields, it recursively calculates the total size. impl SizeHint for FieldMarker { // FieldMarker need introspection to return a size hint. This method is not implementeable fn size_hint(_data: &[u8], _offset: usize) -> Result { @@ -188,7 +228,10 @@ impl SizeHint for Vec { } } -/// Converts a `PrimitiveMarker` into a `FieldMarker` +/// Converts a `PrimitiveMarker` into a `FieldMarker`. +/// +/// This conversion allows primitive types to be represented as field markers, which can +/// then be used in the decoding process. impl From for FieldMarker { fn from(v: PrimitiveMarker) -> Self { FieldMarker::Primitive(v) @@ -196,7 +239,9 @@ impl From for FieldMarker { } /// Attempts to convert a vector of field markers into a single field marker, representing a structure. -/// Returns an error if the vector is empty. +/// +/// This conversion is useful for handling cases where a sequence of field markers is intended +/// to represent a composite structure. If the vector is empty, an error is returned. impl TryFrom> for FieldMarker { type Error = crate::Error; @@ -213,6 +258,9 @@ impl TryFrom> for FieldMarker { } } +/// Converts a `DecodableField` into a vector of `DecodableField`s. +/// If the field is a primitive, it wraps it in a vector. +/// If the field is a structure, it returns the nested fields directly. impl<'a> From> for Vec> { fn from(v: DecodableField<'a>) -> Self { match v { @@ -222,9 +270,14 @@ impl<'a> From> for Vec> { } } -/// Defines the decoding process for a primitive marker, which parses a segment of data -/// and returns the corresponding `DecodablePrimitive`. + +/// Implements the decoding process for a `PrimitiveMarker`. +/// Given a slice of data and an offset, this method parses the corresponding data and returns +/// a `DecodablePrimitive`. This is the core mechanism for decoding primitive types like integers, +/// booleans, and fixed-length byte arrays from raw byte data. impl PrimitiveMarker { + /// Decodes a primitive value from a byte slice at the given offset, returning the corresponding + /// `DecodablePrimitive`. The specific decoding logic depends on the type of the primitive (e.g., `u8`, `u16`, etc.). fn decode<'a>(&self, data: &'a mut [u8], offset: usize) -> DecodablePrimitive<'a> { match self { Self::U8 => DecodablePrimitive::U8(u8::from_bytes_unchecked(&mut data[offset..])), @@ -257,6 +310,11 @@ impl PrimitiveMarker { } } + /// Decodes a primitive value from a reader stream, returning the corresponding + /// `DecodablePrimitive`. This is useful when reading data from a file or network socket, + /// where the data is not immediately available as a slice but must be read incrementally. + #[allow(clippy::wrong_self_convention)] + #[cfg(not(feature = "no_std"))] #[allow(clippy::wrong_self_convention)] #[cfg(not(feature = "no_std"))] #[allow(clippy::wrong_self_convention)] @@ -309,8 +367,11 @@ impl<'a> GetSize for DecodablePrimitive<'a> { } } -/// Provides decoding functionality for a field marker by using the marker type to decode the -/// corresponding data and return a DecodableFiel + +/// Implements the decoding functionality for a `FieldMarker`. +/// Depending on whether the field is primitive or structured, this method decodes the corresponding data. +/// If the field is a structure, it recursively decodes each nested field and returns the resulting +/// `DecodableField`. impl FieldMarker { pub(crate) fn decode<'a>(&self, data: &'a mut [u8]) -> Result, Error> { match self { diff --git a/protocols/v2/binary-sv2/no-serde-sv2/codec/src/codec/mod.rs b/protocols/v2/binary-sv2/no-serde-sv2/codec/src/codec/mod.rs index 32ff3193f..388ed9b31 100644 --- a/protocols/v2/binary-sv2/no-serde-sv2/codec/src/codec/mod.rs +++ b/protocols/v2/binary-sv2/no-serde-sv2/codec/src/codec/mod.rs @@ -56,7 +56,7 @@ pub trait Fixed { const SIZE: usize; } -/// The `Variable` trait is designed for types that have variable size when encoded. +/// The `Variable` trait is designed for types that have variable size when encoded. /// /// Types implementing this trait provide the following: /// diff --git a/protocols/v2/binary-sv2/no-serde-sv2/codec/src/datatypes/copy_data_types.rs b/protocols/v2/binary-sv2/no-serde-sv2/codec/src/datatypes/copy_data_types.rs index 3d3fc52bd..810da6d2b 100644 --- a/protocols/v2/binary-sv2/no-serde-sv2/codec/src/datatypes/copy_data_types.rs +++ b/protocols/v2/binary-sv2/no-serde-sv2/codec/src/datatypes/copy_data_types.rs @@ -11,14 +11,6 @@ impl Fixed for bool { const SIZE: usize = 1; } -/// Boolean data type implementation. This provides encoding and decoding for boolean values. -/// A boolean is encoded as an unsigned 1-bit integer: `true` as 1 and `false` as 0. The remaining -/// 7 bits are padding. Recipients must only interpret the least significant bit (LSB). -/// -/// Behavior: -/// - Encoding: `true` is encoded as `1`, `false` as `0`. -/// - Decoding: Only the LSB is considered. -/// - Future-proofing: Senders may set bits outside the LSB without affecting the meaning. impl<'a> Sv2DataType<'a> for bool { fn from_bytes_unchecked(data: &'a mut [u8]) -> Self { match data @@ -84,7 +76,11 @@ impl Fixed for u64 { const SIZE: usize = 8; } -/// Macro for implementing the `Sv2DataType` trait for unsigned integer types. +/// Macro to implement the `Sv2DataType` trait for unsigned integer types. +/// +/// This macro simplifies encoding and decoding for various unsigned integer types, making them +/// compatible with the SV2 protocol. Each implementation uses the little-endian byte order for +/// serialization and deserialization, ensuring consistency across platforms. macro_rules! impl_sv2_for_unsigned { ($a:ty) => { impl<'a> Sv2DataType<'a> for $a { @@ -139,7 +135,7 @@ impl Fixed for f32 { impl_sv2_for_unsigned!(f32); -// Implementation of a 24-bit unsigned integer (`U24`). +/// Represets a 24-bit unsigned integer (`U24`), supporting SV2 serialization and deserialization. #[repr(C)] #[derive(Debug, Clone, Copy, Eq, PartialEq)] pub struct U24(pub(crate) u32); diff --git a/protocols/v2/binary-sv2/no-serde-sv2/codec/src/datatypes/mod.rs b/protocols/v2/binary-sv2/no-serde-sv2/codec/src/datatypes/mod.rs index a0db2e678..597e9fd0e 100644 --- a/protocols/v2/binary-sv2/no-serde-sv2/codec/src/datatypes/mod.rs +++ b/protocols/v2/binary-sv2/no-serde-sv2/codec/src/datatypes/mod.rs @@ -35,15 +35,20 @@ pub trait Sv2DataType<'a>: Sized + SizeHint + GetSize + TryInto { Ok(Self::from_bytes_unchecked(data)) } + /// Constructs an instance from a mutable byte slice without verifying size constraints. fn from_bytes_unchecked(data: &'a mut [u8]) -> Self; + /// Constructs an instance from a vector, checking for the correct size. fn from_vec_(data: Vec) -> Result; + /// Constructs an instance from a vector without validating its size. fn from_vec_unchecked(data: Vec) -> Self; + /// Constructs an instance from a reader source, checking for size constraints. #[cfg(not(feature = "no_std"))] fn from_reader_(reader: &mut impl Read) -> Result; + /// Serializes the instance to a mutable slice, checking the destination size. fn to_slice(&'a self, dst: &mut [u8]) -> Result { if dst.len() >= self.get_size() { self.to_slice_unchecked(dst); @@ -53,8 +58,10 @@ pub trait Sv2DataType<'a>: Sized + SizeHint + GetSize + TryInto { } } + /// Serializes the instance to a mutable slice without checking the destination size. fn to_slice_unchecked(&'a self, dst: &mut [u8]); + /// Serializes the instance to a writer destination, checking for I/O errors. #[cfg(not(feature = "no_std"))] fn to_writer_(&self, writer: &mut impl Write) -> Result<(), E>; } diff --git a/protocols/v2/binary-sv2/no-serde-sv2/codec/src/datatypes/non_copy_data_types/inner.rs b/protocols/v2/binary-sv2/no-serde-sv2/codec/src/datatypes/non_copy_data_types/inner.rs index 8c1c5f310..8fc7fe189 100644 --- a/protocols/v2/binary-sv2/no-serde-sv2/codec/src/datatypes/non_copy_data_types/inner.rs +++ b/protocols/v2/binary-sv2/no-serde-sv2/codec/src/datatypes/non_copy_data_types/inner.rs @@ -23,7 +23,7 @@ use std::io::{Error as E, Read, Write}; /// - `ISFIXED`: A boolean indicating whether the data has a fixed size. /// - `SIZE`: The size of the data if `ISFIXED` is true. /// - `HEADERSIZE`: The size of the header, which is used for types that require a -/// prefixv to describe the content's length. +/// prefix to describe the content's length. /// - `MAXSIZE`: The maximum allowable size for the data. #[repr(C)] #[derive(Debug)] diff --git a/protocols/v2/binary-sv2/no-serde-sv2/codec/src/lib.rs b/protocols/v2/binary-sv2/no-serde-sv2/codec/src/lib.rs index 929fb07a0..bc74af5e4 100644 --- a/protocols/v2/binary-sv2/no-serde-sv2/codec/src/lib.rs +++ b/protocols/v2/binary-sv2/no-serde-sv2/codec/src/lib.rs @@ -1,3 +1,7 @@ +//! This module defines types, encodings, and conversions between Serde and SV2 protocols, +//! providing abstractions to facilitate encoding, decoding, and error handling of SV2 data types. +//! +//! ## Type Mappings //! ```txt //! SERDE <-> Sv2 //! bool <-> BOOL @@ -35,6 +39,7 @@ pub use crate::codec::{ Fixed, GetSize, SizeHint, }; +/// Converts the provided SV2 data type to a byte vector based on the SV2 encoding format. #[allow(clippy::wrong_self_convention)] pub fn to_bytes(src: T) -> Result, Error> { let mut result = vec![0_u8; src.get_size()]; @@ -42,12 +47,14 @@ pub fn to_bytes(src: T) -> Result, Error> { Ok(result) } +/// Encodes the SV2 data type to the provided byte slice. #[allow(clippy::wrong_self_convention)] pub fn to_writer(src: T, dst: &mut [u8]) -> Result<(), Error> { src.to_bytes(dst)?; Ok(()) } +/// Decodes an SV2-encoded byte slice into the specified data type. pub fn from_bytes<'a, T: Decodable<'a>>(data: &'a mut [u8]) -> Result { T::from_bytes(data) } @@ -64,6 +71,30 @@ pub mod encodable { #[macro_use] extern crate alloc; +/// Error types used within the protocol library to indicate various failure conditions. +/// +/// - `OutOfBound`: Indicates an attempt to read beyond a valid range. +/// - `NotABool(u8)`: Raised when a non-binary value is interpreted as a boolean. +/// - `WriteError(usize, usize)`: Occurs when an unexpected size mismatch arises during a write operation, specifying expected and actual sizes. +/// - `U24TooBig(u32)`: Signifies an overflow condition where a `u32` exceeds the maximum allowable `u24` value. +/// - `InvalidSignatureSize(usize)`: Reports a size mismatch for a signature, such as when it does not match the expected size. +/// - `InvalidU256(usize)`: Raised when a `u256` value is invalid, typically due to size discrepancies. +/// - `InvalidU24(u32)`: Indicates an invalid `u24` representation. +/// - `InvalidB0255Size(usize)`, `InvalidB064KSize(usize)`, `InvalidB016MSize(usize)`: Indicate that a byte array exceeds the maximum allowed size for `B0255`, `B064K`, or `B016M` types, respectively. +/// - `InvalidSeq0255Size(usize)`: Raised when a sequence size exceeds `0255`. +/// - `NonPrimitiveTypeCannotBeEncoded`: Error indicating an attempt to encode a complex type as a primitive. +/// - `PrimitiveConversionError`: Generic conversion error related to primitive types. +/// - `DecodableConversionError`: Error occurring during decoding due to conversion issues. +/// - `UnInitializedDecoder`: Error triggered when a decoder is used without initialization. +/// - `IoError`: Represents I/O-related errors, compatible with `no_std` mode where specific error types may vary. +/// - `ReadError(usize, usize)`: Raised when an unexpected mismatch occurs during read operations, specifying expected and actual read sizes. +/// - `VoidFieldMarker`: Used as a marker error for fields that should remain void or empty. +/// - `ValueExceedsMaxSize(bool, usize, usize, usize, Vec, usize)`: Signifies a value overflow based on protocol restrictions, containing details about fixed/variable size, maximum size allowed, and the offending value details. +/// - `SeqExceedsMaxSize`: Triggered when a sequence type (`Seq0255`, `Seq064K`) exceeds its maximum allowable size. +/// - `NoDecodableFieldPassed`: Raised when no valid decodable field is provided during decoding. +/// - `ValueIsNotAValidProtocol(u8)`: Error for protocol-specific invalid values. +/// - `UnknownMessageType(u8)`: Raised when an unsupported or unknown message type is encountered. +/// - `Sv2OptionHaveMoreThenOneElement(u8)`: Indicates a protocol constraint violation where `Sv2Option` unexpectedly contains multiple elements. #[derive(Debug, PartialEq, Eq, Clone)] pub enum Error { OutOfBound, @@ -110,7 +141,7 @@ impl From for Error { } } -/// FFI-safe Error +/// `CError` is a foreign function interface (FFI)-compatible version of the `Error` enum to facilitate cross-language compatibility. #[repr(C)] #[derive(Debug)] pub enum CError { @@ -241,6 +272,8 @@ impl<'a> From for EncodableField<'a> { } } + +/// A struct to facilitate transferring a `Vec` across FFI boundaries. #[repr(C)] #[derive(Debug, Clone, Copy)] pub struct CVec { @@ -250,6 +283,13 @@ pub struct CVec { } impl CVec { + + /// Returns a mutable slice of the contained data. + /// + /// # Safety + /// + /// The caller must ensure that the data pointed to by `self.data` + /// remains valid for the duration of the returned slice. pub fn as_mut_slice(&mut self) -> &mut [u8] { unsafe { core::slice::from_raw_parts_mut(self.data, self.len) } } @@ -289,10 +329,12 @@ impl From<&[u8]> for CVec { } } -/// Given a C allocated buffer return a rust allocated CVec +/// Creates a `CVec` from a buffer that was allocated in C. /// /// # Safety /// +/// The caller must ensure that the buffer is valid and that +/// the data length does not exceed the allocated size. #[no_mangle] pub unsafe extern "C" fn cvec_from_buffer(data: *const u8, len: usize) -> CVec { let input = std::slice::from_raw_parts(data, len); @@ -312,6 +354,7 @@ pub unsafe extern "C" fn cvec_from_buffer(data: *const u8, len: usize) -> CVec { } } +/// A struct to manage a collection of `CVec` objects across FFI boundaries. #[repr(C)] #[derive(Debug, Clone, Copy)] pub struct CVec2 { @@ -331,10 +374,12 @@ impl From for Vec { } } +/// Frees the underlying memory of a `CVec`. pub fn free_vec(buf: &mut CVec) { let _: Vec = unsafe { Vec::from_raw_parts(buf.data, buf.len, buf.capacity) }; } +/// Frees the underlying memory of a `CVec2` and all its elements. pub fn free_vec_2(buf: &mut CVec2) { let vs: Vec = unsafe { Vec::from_raw_parts(buf.data, buf.len, buf.capacity) }; for mut s in vs { @@ -378,8 +423,11 @@ impl<'a, const A: bool, const B: usize, const C: usize, const D: usize> } } +/// Initializes an empty `CVec2`. +/// /// # Safety /// +/// The caller is responsible for freeing the `CVec2` when it is no longer needed. #[no_mangle] pub unsafe extern "C" fn init_cvec2() -> CVec2 { let mut buffer = Vec::::new(); @@ -396,10 +444,12 @@ pub unsafe extern "C" fn init_cvec2() -> CVec2 { } } -/// The caller is reponsible for NOT adding duplicate cvecs to the cvec2 structure, -/// as this can lead to double free errors when the message is dropped. +/// Adds a `CVec` to a `CVec2`. +/// /// # Safety /// +/// The caller must ensure no duplicate `CVec`s are added, as duplicates may +/// lead to double-free errors when the message is dropped. #[no_mangle] pub unsafe extern "C" fn cvec2_push(cvec2: &mut CVec2, cvec: CVec) { let mut buffer: Vec = Vec::from_raw_parts(cvec2.data, cvec2.len, cvec2.capacity); @@ -445,6 +495,7 @@ impl<'a, T: Into> From> for CVec2 { } } +/// Exported FFI functions for interoperability with C code. #[no_mangle] pub extern "C" fn _c_export_u24(_a: U24) {} #[no_mangle]