Skip to content

Commit

Permalink
[Data Converter] add experimental dat-style serialization (#2347)
Browse files Browse the repository at this point in the history
I did some silly hacking late last week that might get us marginally
closer to unifying all the data marshaling under a single tool.

This adds the ability to specify a `--to dat` (or one of the aliases)
and an output directory which will generate the hex encoded files that
verilator/icarus expect. There are some minor differences with the dat
files generated by the python flow. Mainly, the python flow truncates
leading zeroes in the encoding while I've elected to retain them in the
interest of keeping things simple. Python generates:
```
4B
53
21
5D
1E
5E
2B
B
3C
60
```
while the data-converter generates
```
0000004B
00000053
00000021
0000005D
0000001E
0000005E
0000002B
0000000B
0000003C
00000060
```

This also adds the ability to deserialize this style of data dump but is
slightly brittle at the moment since it expect the following:
- the data header is exactly that used by the tool and is cbor encoded
in a file named `header`. The python currently json encodes things in a
file named `shape`
- the input data includes all leading zeroes

Both these assumptions can probably be relaxed in the future in the
interest of robustness
  • Loading branch information
EclecticGriffin authored Nov 11, 2024
1 parent fd43d16 commit 5bd3599
Show file tree
Hide file tree
Showing 2 changed files with 192 additions and 36 deletions.
30 changes: 21 additions & 9 deletions interp/src/serialization/data_dump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,10 @@ impl MemoryDeclaration {
self.format.width()
}

pub fn bytes_per_entry(&self) -> u32 {
self.format.width().div_ceil(8)
}

pub fn signed(&self) -> bool {
self.format.signed()
}
Expand All @@ -172,6 +176,17 @@ impl DataHeader {
.iter()
.fold(0, |acc, mem| acc + mem.byte_count())
}

pub fn serialize(&self) -> Result<Vec<u8>, SerializationError> {
let mut header_str = Vec::new();
ciborium::ser::into_writer(&self, &mut header_str)?;
Ok(header_str)
}

pub fn deserialize(data: &[u8]) -> Result<Self, SerializationError> {
let header: Self = ciborium::from_reader(data)?;
Ok(header)
}
}

#[derive(Debug, PartialEq)]
Expand Down Expand Up @@ -237,13 +252,11 @@ impl DataDump {
self.push_memory(declaration, data)
}

// TODO Griffin: handle the errors properly
pub fn serialize(
pub fn serialize<W: std::io::Write>(
&self,
writer: &mut dyn std::io::Write,
mut writer: W,
) -> Result<(), SerializationError> {
let mut header_str = Vec::new();
ciborium::ser::into_writer(&self.header, &mut header_str)?;
let header_str = self.header.serialize()?;
writer.write_all(&Self::MAGIC_NUMBER)?;

let len_bytes: u32 = header_str
Expand All @@ -257,9 +270,8 @@ impl DataDump {
Ok(())
}

// TODO Griffin: handle the errors properly
pub fn deserialize(
reader: &mut dyn std::io::Read,
pub fn deserialize<R: std::io::Read>(
mut reader: R,
) -> Result<Self, SerializationError> {
let mut magic_number = [0u8; 4];
reader.read_exact(&mut magic_number).map_err(|e| {
Expand Down Expand Up @@ -291,7 +303,7 @@ impl DataDump {
SerializationError::IoError(e)
}
})?;
let header: DataHeader = ciborium::from_reader(raw_header.as_slice())?;
let header = DataHeader::deserialize(&raw_header)?;

let mut data: Vec<u8> = Vec::with_capacity(header.data_size());

Expand Down
198 changes: 171 additions & 27 deletions tools/cider-data-converter/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
use argh::FromArgs;
use cider_data_converter::{converter, json_data::JsonData};
use interp::serialization::{self, SerializationError};
use core::str;
use interp::serialization::{self, DataDump, SerializationError};
use itertools::Itertools;
use std::{
fs::File,
io::{self, Read, Write},
io::{self, BufRead, BufReader, BufWriter, Read, Write},
path::PathBuf,
str::FromStr,
};
Expand All @@ -12,6 +14,8 @@ use thiserror::Error;
const JSON_EXTENSION: &str = "data";
const CIDER_EXTENSION: &str = "dump";

const HEADER_FILENAME: &str = "header";

#[derive(Error)]
enum CiderDataConverterError {
#[error("Failed to read file: {0}")]
Expand All @@ -36,18 +40,27 @@ impl std::fmt::Debug for CiderDataConverterError {
}
}

enum Action {
ToDataDump,
ToJson,
/// What are we converting the input to
#[derive(Debug, Clone, Copy)]
enum Target {
/// Cider's Single-file DataDump format
DataDump,
/// Verilator/icarus directory format
Dat,
/// Human readable output JSON
Json,
}

impl FromStr for Action {
impl FromStr for Target {
type Err = CiderDataConverterError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"json" => Ok(Action::ToJson),
"cider" | "dump" | "data-dump" => Ok(Action::ToDataDump),
"json" => Ok(Target::Json),
"cider" | "dump" | "data-dump" => Ok(Target::DataDump),
"dat" | "verilog-dat" | "verilog" | "verilator" | "icarus" => {
Ok(Target::Dat)
}
_ => Err(CiderDataConverterError::BadToArgument(s.to_string())),
}
}
Expand All @@ -71,7 +84,7 @@ struct Opts {
/// optional specification of what action to perform. Can be "cider" or
/// "json". If not provided, the converter will try to guess based on file names
#[argh(option, short = 't', long = "to")]
action: Option<Action>,
action: Option<Target>,

/// whether to use quotes around floating point numbers in the output. This
/// exists solely for backwards compatibility with the old display format.
Expand All @@ -82,18 +95,6 @@ struct Opts {
fn main() -> Result<(), CiderDataConverterError> {
let mut opts: Opts = argh::from_env();

let mut input: Box<dyn Read> = opts
.input_path
.as_ref()
.map(|path| File::open(path).map(|x| Box::new(x) as Box<dyn Read>))
.unwrap_or(Ok(Box::new(io::stdin())))?;

let mut output: Box<dyn Write> = opts
.output_path
.as_ref()
.map(|path| File::create(path).map(|x| Box::new(x) as Box<dyn Write>))
.unwrap_or(Ok(Box::new(io::stdout())))?;

// if no action is specified, try to guess based on file extensions
if opts.action.is_none()
&& (opts.input_path.as_ref().is_some_and(|x| {
Expand All @@ -102,28 +103,95 @@ fn main() -> Result<(), CiderDataConverterError> {
x.extension().map_or(false, |y| y == CIDER_EXTENSION)
}))
{
opts.action = Some(Action::ToDataDump);
opts.action = Some(Target::DataDump);
} else if opts.action.is_none()
&& (opts.output_path.as_ref().is_some_and(|x| {
x.extension().map_or(false, |x| x == JSON_EXTENSION)
}) || opts.input_path.as_ref().is_some_and(|x| {
x.extension().map_or(false, |x| x == CIDER_EXTENSION)
}))
{
opts.action = Some(Action::ToJson);
opts.action = Some(Target::Json);
}

if let Some(action) = opts.action {
match action {
Action::ToDataDump => {
Target::DataDump => {
let (mut input, mut output) = get_io_handles(&opts)?;

let parsed_json: JsonData =
serde_json::from_reader(&mut input)?;
converter::convert_to_data_dump(&parsed_json, opts.round_float)
.serialize(&mut output)?;
}
Action::ToJson => {
let data_dump =
serialization::DataDump::deserialize(&mut input)?;
Target::Json => {
let data_dump = if let Some(path) = &opts.input_path {
if path.is_dir() {
// we are converting from a dat directory rather than a
// dump

let header = {
let mut header_file =
File::open(path.join(HEADER_FILENAME))?;
let mut raw_header = vec![];
header_file.read_to_end(&mut raw_header)?;

serialization::DataHeader::deserialize(&raw_header)?
};

let mut data: Vec<u8> = vec![];

for mem_dec in &header.memories {
let starting_len = data.len();
let mem_file = BufReader::new(File::open(
path.join(&mem_dec.name),
)?);

let mut line_data = vec![];
for line in mem_file.lines() {
let line = line?;
for pair in &line.chars().chunks(2) {
// there has got to be a better way to do this...
let string =
pair.into_iter().collect::<String>();
let val = u8::from_str_radix(&string, 16)
.expect("invalid hex");
line_data.push(val);
}
// TODO griffin: handle inputs that are
// truncated or otherwise shorter than expected

assert!(
line_data.len()
== (mem_dec.bytes_per_entry() as usize)
);
// reverse the byte order to get the expected
// little endian and reuse the vec
data.extend(line_data.drain(..).rev())
}

assert_eq!(
data.len() - starting_len,
mem_dec.byte_count()
);
}

DataDump { header, data }
} else {
// we are converting from a dump file
serialization::DataDump::deserialize(
&mut get_read_handle(&opts)?,
)?
}
} else {
// we are converting from a dump file
serialization::DataDump::deserialize(&mut get_read_handle(
&opts,
)?)?
};

let mut output = get_output_handle(&opts)?;

let json_data = converter::convert_from_data_dump(
&data_dump,
opts.use_quotes,
Expand All @@ -134,6 +202,51 @@ fn main() -> Result<(), CiderDataConverterError> {
serde_json::to_string_pretty(&json_data)?
)?;
}
Target::Dat => {
let mut input = get_read_handle(&opts)?;
let parsed_json: JsonData =
serde_json::from_reader(&mut input)?;
let data = converter::convert_to_data_dump(
&parsed_json,
opts.round_float,
);

if let Some(path) = opts.output_path {
if path.exists() && !path.is_dir() {
// TODO griffin: Make this an actual error
panic!("Output path exists but is not a directory")
} else if !path.exists() {
std::fs::create_dir(&path)?;
}

let mut header_output = File::create(path.join("header"))?;
header_output.write_all(&data.header.serialize()?)?;

for memory in &data.header.memories {
let file = File::create(path.join(&memory.name))?;
let mut writer = BufWriter::new(file);
for bytes in data
.get_data(&memory.name)
.unwrap()
.chunks_exact(memory.bytes_per_entry() as usize)
{
// data file seems to expect lsb on the right
// for the moment electing to print out every byte
// and do so with two hex digits per byte rather
// than truncating leading zeroes. No need to do
// anything fancy here.
for byte in bytes.iter().rev() {
write!(writer, "{byte:02X}")?;
}

writeln!(writer)?;
}
}
} else {
// TODO griffin: Make this an actual error
panic!("Output path not specified, this is required for the dat target")
}
}
}
} else {
// Since we can't guess based on input/output file names and no target
Expand All @@ -143,3 +256,34 @@ fn main() -> Result<(), CiderDataConverterError> {

Ok(())
}

#[allow(clippy::type_complexity)]
fn get_io_handles(
opts: &Opts,
) -> Result<(Box<dyn Read>, Box<dyn Write>), CiderDataConverterError> {
let input = get_read_handle(opts)?;
let output = get_output_handle(opts)?;
Ok((input, output))
}

fn get_output_handle(
opts: &Opts,
) -> Result<Box<dyn Write>, CiderDataConverterError> {
let output: Box<dyn Write> = opts
.output_path
.as_ref()
.map(|path| File::create(path).map(|x| Box::new(x) as Box<dyn Write>))
.unwrap_or(Ok(Box::new(io::stdout())))?;
Ok(output)
}

fn get_read_handle(
opts: &Opts,
) -> Result<Box<dyn Read>, CiderDataConverterError> {
let input: Box<dyn Read> = opts
.input_path
.as_ref()
.map(|path| File::open(path).map(|x| Box::new(x) as Box<dyn Read>))
.unwrap_or(Ok(Box::new(io::stdin())))?;
Ok(input)
}

0 comments on commit 5bd3599

Please sign in to comment.