From d6059df9c2876d577eff129b66b73f6a21547b32 Mon Sep 17 00:00:00 2001 From: sslivkoff Date: Sun, 13 Aug 2023 12:08:36 -0700 Subject: [PATCH] save report file every time cryo is run --- Cargo.lock | 58 +++++++++++--------- crates/cli/Cargo.toml | 3 + crates/cli/build.rs | 22 ++++++++ crates/cli/src/args.rs | 15 +++-- crates/cli/src/lib.rs | 1 + crates/cli/src/main.rs | 1 + crates/cli/src/reports.rs | 61 +++++++++++++++++++++ crates/cli/src/run.rs | 21 ++++++- crates/cli/src/summaries.rs | 5 ++ crates/freeze/Cargo.toml | 2 + crates/freeze/src/types/datatypes/scalar.rs | 2 +- crates/freeze/src/types/errors.rs | 12 ++++ crates/freeze/src/types/summaries.rs | 1 + crates/python/src/collect_adapter.rs | 6 ++ crates/python/src/freeze_adapter.rs | 6 ++ 15 files changed, 183 insertions(+), 33 deletions(-) create mode 100644 crates/cli/build.rs create mode 100644 crates/cli/src/reports.rs diff --git a/Cargo.lock b/Cargo.lock index 30cbfe27..576fd5d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -230,7 +230,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.28", ] [[package]] @@ -241,7 +241,7 @@ checksum = "79fa67157abdfd688a259b6648808757db9347af834624f27ec646da976aee5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.28", ] [[package]] @@ -462,7 +462,7 @@ checksum = "fdde5c9cd29ebd706ce1b35600920a33550e402fc998a2e53ad3b42c3c47a192" dependencies = [ "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.28", ] [[package]] @@ -605,7 +605,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.28", ] [[package]] @@ -859,6 +859,8 @@ dependencies = [ "hex", "indicatif", "polars", + "serde", + "serde_json", "thousands", "tokio", ] @@ -876,6 +878,8 @@ dependencies = [ "indicatif", "polars", "prefix-hex", + "serde", + "serde_json", "thiserror", "tokio", ] @@ -1281,7 +1285,7 @@ dependencies = [ "reqwest", "serde", "serde_json", - "syn 2.0.23", + "syn 2.0.28", "toml", "walkdir", ] @@ -1299,7 +1303,7 @@ dependencies = [ "proc-macro2", "quote", "serde_json", - "syn 2.0.23", + "syn 2.0.28", ] [[package]] @@ -1325,7 +1329,7 @@ dependencies = [ "serde", "serde_json", "strum 0.25.0", - "syn 2.0.23", + "syn 2.0.28", "tempfile", "thiserror", "tiny-keccak", @@ -1649,7 +1653,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.28", ] [[package]] @@ -2527,7 +2531,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.28", ] [[package]] @@ -2779,7 +2783,7 @@ dependencies = [ "phf_shared 0.11.2", "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.28", ] [[package]] @@ -2817,7 +2821,7 @@ checksum = "ec2e072ecce94ec471b13398d5402c188e76ac03cf74dd1a975161b23a3f6d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.28", ] [[package]] @@ -3154,7 +3158,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92139198957b410250d43fad93e630d956499a625c527eda65175c8680f83387" dependencies = [ "proc-macro2", - "syn 2.0.23", + "syn 2.0.28", ] [[package]] @@ -3759,29 +3763,29 @@ checksum = "63134939175b3131fe4d2c131b103fd42f25ccca89423d43b5e4f267920ccf03" [[package]] name = "serde" -version = "1.0.166" +version = "1.0.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d01b7404f9d441d3ad40e6a636a7782c377d2abdbe4fa2440e2edcc2f4f10db8" +checksum = "32ac8da02677876d532745a130fc9d8e6edfa81a269b107c5b00829b91d8eb3c" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.166" +version = "1.0.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd83d6dde2b6b2d466e14d9d1acce8816dedee94f735eac6395808b3483c6d6" +checksum = "aafe972d60b0b9bee71a91b92fee2d4fb3c9d7e8f6b179aa99f27203d99a4816" dependencies = [ "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.28", ] [[package]] name = "serde_json" -version = "1.0.100" +version = "1.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f1e14e89be7aa4c4b78bdbdc9eb5bf8517829a600ae8eaa39a6e1d960b5185c" +checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c" dependencies = [ "itoa", "ryu", @@ -4087,7 +4091,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.23", + "syn 2.0.28", ] [[package]] @@ -4129,9 +4133,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.23" +version = "2.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737" +checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567" dependencies = [ "proc-macro2", "quote", @@ -4212,7 +4216,7 @@ checksum = "d14928354b01c4d6a4f0e549069adef399a284e7995c7ccca94e8a07a5346c59" dependencies = [ "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.28", ] [[package]] @@ -4298,7 +4302,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.28", ] [[package]] @@ -4400,7 +4404,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.28", ] [[package]] @@ -4615,7 +4619,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.28", "wasm-bindgen-shared", ] @@ -4649,7 +4653,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.28", "wasm-bindgen-backend", "wasm-bindgen-shared", ] diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index e5a8bb46..a20ddee9 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -1,5 +1,6 @@ [package] name = "cryo_cli" +build = "build.rs" description = "cryo is the easiest way to extract blockchain data to parquet, csv, or json" version.workspace = true edition.workspace = true @@ -32,3 +33,5 @@ chrono = "0.4.26" anstyle = "1.0.1" eyre = "0.6.8" governor = "0.5.1" +serde = { version = "1.0.183", features = ["derive"] } +serde_json = "1.0.104" diff --git a/crates/cli/build.rs b/crates/cli/build.rs new file mode 100644 index 00000000..b51382a9 --- /dev/null +++ b/crates/cli/build.rs @@ -0,0 +1,22 @@ +use std::process::Command; + +fn main() { + // Run `git describe --tags --always` + let output = + Command::new("git").args(["describe", "--tags", "--always"]).output().unwrap_or_else(|e| { + panic!("Failed to execute git command: {}", e); + }); + + if output.status.success() { + let git_description = String::from_utf8(output.stdout) + .unwrap_or_else(|e| { + panic!("Failed to read git command output: {}", e); + }) + .trim() + .to_string(); + + println!("cargo:rustc-env=GIT_DESCRIPTION={}", git_description); + } else { + println!("cargo:warning=Could not determine git description"); + } +} diff --git a/crates/cli/src/args.rs b/crates/cli/src/args.rs index 24090585..98cfe735 100644 --- a/crates/cli/src/args.rs +++ b/crates/cli/src/args.rs @@ -1,8 +1,9 @@ use clap_cryo::Parser; use color_print::cstr; +use serde::{Deserialize, Serialize}; /// Command line arguments -#[derive(Parser, Debug)] +#[derive(Parser, Debug, Serialize, Deserialize)] #[command(name = "cryo", author, version, about = get_about_str(), long_about = None, styles=get_styles(), after_help=get_after_str(), allow_negative_numbers = true)] pub struct Args { /// datatype to collect @@ -132,9 +133,15 @@ pub struct Args { #[arg(long, help_heading="Output Options", value_name="NAME [#]", num_args(1..=2), default_value = "lz4")] pub compression: Vec, - // /// [transactions] track gas used by each transaction - // #[arg(long, help_heading = "Dataset-specific Options")] - // pub gas_used: bool, + /// Directory to save summary report + /// [default: {output_dir}/.cryo_reports] + #[arg(long, help_heading = "Output Options")] + pub report_dir: Option, + + /// Avoid saving a summary report + #[arg(long, help_heading = "Output Options")] + pub no_report: bool, + /// [logs] filter logs by contract address #[arg(long, help_heading = "Dataset-specific Options")] pub contract: Option, diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index c3f32cb4..8a6686b4 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -9,6 +9,7 @@ mod args; mod parse; +mod reports; mod run; mod summaries; diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index eeeb7c14..d7ab8e71 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -4,6 +4,7 @@ use clap_cryo::Parser; mod args; mod parse; +mod reports; mod run; mod summaries; diff --git a/crates/cli/src/reports.rs b/crates/cli/src/reports.rs new file mode 100644 index 00000000..dcf1121c --- /dev/null +++ b/crates/cli/src/reports.rs @@ -0,0 +1,61 @@ +use crate::args; +use chrono::{DateTime, Local}; +use cryo_freeze::{FreezeError, FreezeSummary}; +use std::{fs::File, io::Write, path::Path, time::SystemTime}; + +#[derive(serde::Serialize, Debug)] +struct FreezeReport<'a> { + cryo_version: String, + // node_client: String, + cli_command: Vec, + args: &'a args::Args, + chunk_summary: Option<&'a FreezeSummary>, +} + +pub(crate) fn get_report_path( + args: &args::Args, + t_start: SystemTime, + is_complete: bool, +) -> Result { + let report_dir = match &args.report_dir { + Some(report_dir) => Path::new(&report_dir).into(), + None => Path::new(&args.output_dir).join(".cryo_reports"), + }; + std::fs::create_dir_all(&report_dir)?; + let t_start: DateTime = t_start.into(); + let timestamp: String = t_start.format("%Y-%m-%d_%H-%M-%S").to_string(); + let filename = if is_complete { + timestamp + ".json" + } else { + format!("incomplete_{}", timestamp + ".json") + }; + let path = report_dir.join(filename); + path.to_str() + .ok_or(FreezeError::GeneralError("non-String path".to_string())) + .map(|s| s.to_string()) +} + +pub(crate) fn write_report( + args: &args::Args, + freeze_summary: Option<&FreezeSummary>, + t_start: SystemTime, +) -> Result { + // determine version + let cryo_version = format!("{}__{}", env!("CARGO_PKG_VERSION"), env!("GIT_DESCRIPTION")); + let report = FreezeReport { + cryo_version, + cli_command: std::env::args().collect(), + args, + chunk_summary: freeze_summary, + }; + let serialized = serde_json::to_string(&report)?; + + // create path + let path = get_report_path(args, t_start, freeze_summary.is_some())?; + + // save to file + let mut file = File::create(&path)?; + file.write_all(serialized.as_bytes())?; + + Ok(path) +} diff --git a/crates/cli/src/run.rs b/crates/cli/src/run.rs index 8b3f93e7..cf91bd08 100644 --- a/crates/cli/src/run.rs +++ b/crates/cli/src/run.rs @@ -18,7 +18,13 @@ pub async fn run(args: args::Args) -> Result, FreezeError> // print summary if !args.no_verbose { - summaries::print_cryo_summary(&query, &source, &sink, n_chunks_remaining); + let report_path = if !args.no_report && n_chunks_remaining > 0 { + let report_path = crate::reports::get_report_path(&args, t_start, true)?; + Some(report_path.strip_prefix("./").unwrap_or(&report_path).to_string()) + } else { + None + }; + summaries::print_cryo_summary(&query, &source, &sink, n_chunks_remaining, report_path); } // check dry run @@ -29,6 +35,11 @@ pub async fn run(args: args::Args) -> Result, FreezeError> return Ok(None) }; + // create initial report + if !args.no_report && n_chunks_remaining > 0 { + crate::reports::write_report(&args, None, t_start)?; + }; + // create progress bar let bar = Arc::new(ProgressBar::new(n_chunks_remaining)); bar.set_style( @@ -56,6 +67,14 @@ pub async fn run(args: args::Args) -> Result, FreezeError> ) } + let n_attempts = freeze_summary.n_completed + freeze_summary.n_errored; + if !args.no_report && n_attempts > 0 { + crate::reports::write_report(&args, Some(&freeze_summary), t_start)?; + let incomplete_report_path = + crate::reports::get_report_path(&args, t_start, false)?; + std::fs::remove_file(incomplete_report_path)?; + }; + // return summary Ok(Some(freeze_summary)) } diff --git a/crates/cli/src/summaries.rs b/crates/cli/src/summaries.rs index 988ddeb6..7bd496ec 100644 --- a/crates/cli/src/summaries.rs +++ b/crates/cli/src/summaries.rs @@ -34,6 +34,7 @@ pub(crate) fn print_cryo_summary( source: &Source, sink: &FileOutput, n_chunks_remaining: u64, + report_path: Option, ) { print_header("cryo parameters"); let datatype_strs: Vec<_> = query.schemas.keys().map(|d| d.dataset().name()).collect(); @@ -57,6 +58,10 @@ pub(crate) fn print_cryo_summary( }; print_bullet("output format", sink.format.as_str()); print_bullet("output dir", &sink.output_dir); + match report_path { + None => print_bullet("report file", "None"), + Some(path) => print_bullet("report file", path), + }; print_schemas(&query.schemas); } diff --git a/crates/freeze/Cargo.toml b/crates/freeze/Cargo.toml index 5da17019..164695c4 100644 --- a/crates/freeze/Cargo.toml +++ b/crates/freeze/Cargo.toml @@ -19,6 +19,8 @@ indexmap = "2.0.0" indicatif = "0.17.5" polars = { version = "0.30.0", features = ["parquet", "string_encoding", "polars-lazy", "lazy", "binary_encoding", "json", "dtype-struct"] } prefix-hex = "0.7.0" +serde = { version = "1.0.183", features = ["derive"] } +serde_json = "1.0.104" thiserror = "1.0.40" tokio = { version = "1.28.2", features = ["macros", "rt-multi-thread", "sync"] } diff --git a/crates/freeze/src/types/datatypes/scalar.rs b/crates/freeze/src/types/datatypes/scalar.rs index dd0e0248..f63f9af1 100644 --- a/crates/freeze/src/types/datatypes/scalar.rs +++ b/crates/freeze/src/types/datatypes/scalar.rs @@ -32,7 +32,7 @@ pub struct NativeTransfers; pub struct Contracts; /// enum of possible datatypes that cryo can collect -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize)] pub enum Datatype { /// Balance Diffs BalanceDiffs, diff --git a/crates/freeze/src/types/errors.rs b/crates/freeze/src/types/errors.rs index 06760862..64516d3c 100644 --- a/crates/freeze/src/types/errors.rs +++ b/crates/freeze/src/types/errors.rs @@ -25,6 +25,18 @@ pub enum FreezeError { /// Parse error #[error("Parsing error")] ParseError(#[from] ParseError), + + /// Error from serializing report + #[error("JSON error")] + ReportSerializeError(#[from] serde_json::Error), + + /// Error from serializing report + #[error("File creation error")] + ReportFileCreationError(#[from] std::io::Error), + + /// General Error + #[error("{0}")] + GeneralError(String), } /// Error related to data collection diff --git a/crates/freeze/src/types/summaries.rs b/crates/freeze/src/types/summaries.rs index de6f728e..5f8c72d8 100644 --- a/crates/freeze/src/types/summaries.rs +++ b/crates/freeze/src/types/summaries.rs @@ -2,6 +2,7 @@ use crate::types::Datatype; use std::collections::HashMap; /// Summary of freeze operation +#[derive(serde::Serialize, Debug)] pub struct FreezeSummary { /// number of chunks completed successfully pub n_completed: u64, diff --git a/crates/python/src/collect_adapter.rs b/crates/python/src/collect_adapter.rs index 35ae5729..a09176dd 100644 --- a/crates/python/src/collect_adapter.rs +++ b/crates/python/src/collect_adapter.rs @@ -35,6 +35,8 @@ use cryo_freeze::collect; n_row_groups = None, no_stats = false, compression = vec!["lz4".to_string()], + report_dir = None, + no_report = false, contract = None, topic0 = None, topic1 = None, @@ -74,6 +76,8 @@ pub fn _collect( n_row_groups: Option, no_stats: bool, compression: Vec, + report_dir: Option, + no_report: bool, contract: Option, topic0: Option, topic1: Option, @@ -110,6 +114,8 @@ pub fn _collect( n_row_groups, no_stats, compression, + report_dir, + no_report, contract, topic0, topic1, diff --git a/crates/python/src/freeze_adapter.rs b/crates/python/src/freeze_adapter.rs index 146e4418..0d6d88d3 100644 --- a/crates/python/src/freeze_adapter.rs +++ b/crates/python/src/freeze_adapter.rs @@ -36,6 +36,8 @@ use cryo_cli::{run, Args}; n_row_groups = None, no_stats = false, compression = vec!["lz4".to_string()], + report_dir = None, + no_report = false, contract = None, topic0 = None, topic1 = None, @@ -75,6 +77,8 @@ pub fn _freeze( n_row_groups: Option, no_stats: bool, compression: Vec, + report_dir: Option, + no_report: bool, contract: Option, topic0: Option, topic1: Option, @@ -111,6 +115,8 @@ pub fn _freeze( n_row_groups, no_stats, compression, + report_dir, + no_report, contract, topic0, topic1,