diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/.gitignore b/catalyst-gateway/crates/catalyst-chain-syncer/.gitignore new file mode 100644 index 00000000000..ea8c4bf7f35 --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/.gitignore @@ -0,0 +1 @@ +/target diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/Cargo.toml b/catalyst-gateway/crates/catalyst-chain-syncer/Cargo.toml new file mode 100644 index 00000000000..5af0a30eb52 --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/Cargo.toml @@ -0,0 +1,31 @@ +[workspace] +resolver = "2" + +members = [ + "bin/catalyst-chain-syncer", + "crates/cardano-immutabledb-reader", + "crates/cardano-immutabledb-reader/testbench", + "crates/catalyst-chaindata-types", + "crates/catalyst-chaindata-writer", + "crates/postgres-util", +] + +[workspace.dependencies] +postgres-util = { path = "crates/postgres-util" } +cardano-immutabledb-reader = { path = "crates/cardano-immutabledb-reader" } +catalyst-chaindata-types = { path = "crates/catalyst-chaindata-types" } +catalyst-chaindata-writer = { path = "crates/catalyst-chaindata-writer" } + +anyhow = "1.0.83" +chrono = "0.4.38" +lazy_static = "1.4.0" +pallas-addresses = "0.25.0" +pallas-codec = "0.25.0" +pallas-traverse = "0.25.0" +serde = { version = "1.0.202", default-features = false } +serde_json = "1.0.117" +tokio = { version = "1.37.0", default-features = false } + +[profile.release] +# Should be on just for profiling +debug = true diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/LICENSE b/catalyst-gateway/crates/catalyst-chain-syncer/LICENSE new file mode 100644 index 00000000000..261eeb9e9f8 --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/README.md b/catalyst-gateway/crates/catalyst-chain-syncer/README.md new file mode 100644 index 00000000000..9cbc57565c3 --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/README.md @@ -0,0 +1 @@ +# catalyst-chain-syncer \ No newline at end of file diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/bin/catalyst-chain-syncer/Cargo.toml b/catalyst-gateway/crates/catalyst-chain-syncer/bin/catalyst-chain-syncer/Cargo.toml new file mode 100644 index 00000000000..1e29811fc1e --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/bin/catalyst-chain-syncer/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "catalyst-chain-syncer" +version = "0.1.0" +edition = "2021" + +[dependencies] +cardano-immutabledb-reader = { workspace = true } +catalyst-chaindata-types = { workspace = true } +catalyst-chaindata-writer = { workspace = true } +postgres-util = { workspace = true } + +clap = { version = "4.5.4", default-features = false, features = ["derive", "help", "usage", "std"] } +memory-stats = "1.1.0" +pallas-traverse = { workspace = true } +parse-size = "1.0.0" +tokio = { workspace = true, features = ["rt-multi-thread", "macros", "signal", "sync"] } diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/bin/catalyst-chain-syncer/src/main.rs b/catalyst-gateway/crates/catalyst-chain-syncer/bin/catalyst-chain-syncer/src/main.rs new file mode 100644 index 00000000000..17885f50dfe --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/bin/catalyst-chain-syncer/src/main.rs @@ -0,0 +1,275 @@ +use std::{ + error::Error, + path::PathBuf, + sync::{ + atomic::{AtomicU64, Ordering::Acquire}, + Arc, + }, + time::{Duration, Instant}, +}; + +use cardano_immutabledb_reader::block_reader::{BlockReader, BlockReaderConfig}; +use catalyst_chaindata_types::{ + CardanoBlock, CardanoSpentTxo, CardanoTransaction, CardanoTxo, Network, +}; +use catalyst_chaindata_writer::{ + writers::postgres::PostgresWriter, ChainDataWriter, ChainDataWriterHandle, WriteData, +}; +use clap::Parser; +use pallas_traverse::MultiEraBlock; +use tokio::sync::{mpsc, OwnedSemaphorePermit}; + +fn parse_byte_size(s: &str) -> Result { + parse_size::parse_size(s).map_err(|e| e.to_string()) +} + +#[derive(Parser)] +struct Cli { + #[clap(long)] + immutabledb_path: PathBuf, + #[clap(long)] + network: Network, + + #[clap(long)] + database_url: String, + + #[clap(long, default_value_t = 1)] + read_workers_count: usize, + #[clap(long, value_parser = parse_byte_size, default_value = "128MiB")] + read_worker_buffer_size: u64, + #[clap(long, value_parser = parse_byte_size, default_value = "256MiB")] + unprocessed_data_buffer_size: u64, + #[clap(long, default_value_t = 1)] + processing_workers_count: usize, + #[clap(long, value_parser = parse_byte_size, default_value = "256MiB")] + write_worker_buffer_size: u64, +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + let cli = Cli::parse(); + + postgres_util::create_tables_if_not_present(&cli.database_url).await?; + let pg_writer = PostgresWriter::open(&cli.database_url).await?; + + let (writer, writer_handle) = + ChainDataWriter::new(pg_writer, cli.write_worker_buffer_size as usize).await?; + + // Stats + let latest_block_number = Arc::new(AtomicU64::new(0)); + let latest_slot_number = Arc::new(AtomicU64::new(0)); + let read_byte_count = Arc::new(AtomicU64::new(0)); + let processed_byte_count = Arc::new(AtomicU64::new(0)); + + let mut processing_workers_txs = Vec::new(); + + for _ in 0..cli.processing_workers_count { + let (block_data_tx, block_data_rx) = mpsc::unbounded_channel::<(_, Vec<_>)>(); + processing_workers_txs.push(block_data_tx); + + tokio::task::spawn(process_block_bytes( + block_data_rx, + cli.network, + writer_handle.clone(), + read_byte_count.clone(), + processed_byte_count.clone(), + latest_block_number.clone(), + latest_slot_number.clone(), + )); + } + + // Drop extra writer handle since all workers have a reference to that now + drop(writer_handle); + + let reader_config = BlockReaderConfig { + worker_read_buffer_byte_size: cli.read_worker_buffer_size as usize, + read_worker_count: cli.read_workers_count, + unprocessed_data_buffer_byte_size: cli.unprocessed_data_buffer_size as usize, + }; + + let mut t = Instant::now(); + println!("Checking synced chain data..."); + + let (missing_data_ranges, latest_slot) = { + let conn = postgres_util::connection::Connection::open(&cli.database_url).await?; + + let rows = conn + .client() + .query( + include_str!("../../../crates/postgres-util/sql/find_missing_data.sql"), + &[], + ) + .await?; + + let mut missing_data_ranges = Vec::new(); + for row in rows { + let start_slot_no = row.get::<_, i64>(0) as u64; + let end_slot_no = row.get::<_, i64>(1) as u64; + + missing_data_ranges.push((start_slot_no + 1)..end_slot_no); + } + + let row = conn + .client() + .query_opt( + include_str!("../../../crates/postgres-util/sql/latest_slot.sql"), + &[], + ) + .await?; + + let latest_slot = match row { + Some(row) => row.get::<_, i64>(0) as u64, + None => 0, + }; + + conn.close().await?; + + (missing_data_ranges, latest_slot) + }; + + if !missing_data_ranges.is_empty() { + println!("Found missing data ranges. Recovering."); + + for r in missing_data_ranges { + println!("Recovering range {r:?}"); + let mut reader = + BlockReader::new(cli.immutabledb_path.clone(), &reader_config, r).await?; + + let mut i = 0; + while let Some(v) = reader.next().await { + processing_workers_txs[i].send(v).expect("Worker"); + i = (i + 1) % cli.processing_workers_count; + } + } + } + + println!("Done (took {:?})", t.elapsed()); + + t = Instant::now(); + let mut reader = + BlockReader::new(cli.immutabledb_path, &reader_config, (latest_slot + 1)..).await?; + + let mut ticker = tokio::time::interval(Duration::from_secs(1)); + let mut i = 0; + let mut create_indexes = true; + + loop { + tokio::select! { + _ = tokio::signal::ctrl_c() => { + println!("Stopping block readers and processing workers..."); + + // TODO: Do this in a better way + drop(reader); + drop(processing_workers_txs); + + create_indexes = false; + + break; + } + + res = reader.next() => { + match res { + Some(v) => { + processing_workers_txs[i].send(v).expect("Worker"); + i = (i + 1) % cli.processing_workers_count; + } + None => { + // TODO: Do this in a better way + drop(reader); + drop(processing_workers_txs); + + break; + } + } + } + + _ = ticker.tick() => { + let mem_usage = memory_stats::memory_stats().map(|s| s.physical_mem).unwrap_or(0); + + println!( + "BLOCK {} | SLOT {} | READ {} MB/s | PROCESSED {} MB/s | MEMORY USAGE {} MB", + latest_block_number.load(Acquire), + latest_slot_number.load(Acquire), + read_byte_count.swap(0, Acquire) / (1024 * 1024), + processed_byte_count.swap(0, Acquire) / (1024 * 1024), + mem_usage / (1024 * 1024), + ); + } + } + } + + // Wait for the chain data writer to flush all data to postgres + println!("Waiting writer..."); + writer.await?; + println!("Finished syncing immutabledb data ({:?})", t.elapsed()); + + if create_indexes { + println!("Creating indexes..."); + t = Instant::now(); + tokio::select! { + _ = tokio::signal::ctrl_c() => { + println!("Exiting"); + } + + res = postgres_util::create_indexes(&cli.database_url) => { + res?; + println!("Done (took {:?})", t.elapsed()); + } + } + } + + Ok(()) +} + +async fn process_block_bytes( + mut block_data_rx: mpsc::UnboundedReceiver<(OwnedSemaphorePermit, Vec)>, + network: Network, + writer_handle: ChainDataWriterHandle, + read_byte_count: Arc, + processed_byte_count: Arc, + latest_block_number: Arc, + latest_slot_number: Arc, +) { + while let Some((_permit, block_bytes)) = block_data_rx.recv().await { + read_byte_count.fetch_add(block_bytes.len() as u64, Acquire); + + let block = MultiEraBlock::decode(&block_bytes).expect("Decode"); + + let Ok(block_data) = CardanoBlock::from_block(&block, network) else { + eprintln!("Failed to parse block"); + continue; + }; + + let Ok(transaction_data) = CardanoTransaction::many_from_block(&block, network) else { + eprintln!("Failed to parse transactions"); + continue; + }; + + let txs = block.txs(); + + let Ok(transaction_outputs_data) = CardanoTxo::from_transactions(&txs) else { + eprintln!("Failed to parse TXOs"); + continue; + }; + + let Ok(spent_transaction_outputs_data) = CardanoSpentTxo::from_transactions(&txs) else { + eprintln!("Failed to parse spent TXOs"); + continue; + }; + + let write_data = WriteData { + block: block_data, + transactions: transaction_data, + transaction_outputs: transaction_outputs_data, + spent_transaction_outputs: spent_transaction_outputs_data, + }; + + // Update stats + processed_byte_count.fetch_add(write_data.byte_size() as u64, Acquire); + latest_block_number.fetch_max(block.number(), Acquire); + latest_slot_number.fetch_max(block.slot(), Acquire); + drop(block_bytes); + + writer_handle.write(write_data).await.expect("Write"); + } +} diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/Cargo.toml b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/Cargo.toml new file mode 100644 index 00000000000..33cb200c1ae --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "cardano-immutabledb-reader" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +binary-layout = "4.0.2" +pallas-codec = { workspace = true } +pallas-traverse = { workspace = true } +tokio = { workspace = true, features = ["fs", "macros", "io-util", "sync", "rt"] } +tokio-util = "0.7.11" + +[dev-dependencies] +pallas-traverse = { workspace = true } +tokio = { workspace = true, features = ["macros", "rt"] } diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/src/block_reader.rs b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/src/block_reader.rs new file mode 100644 index 00000000000..bbedaefc2ff --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/src/block_reader.rs @@ -0,0 +1,250 @@ +use std::{ + collections::BTreeSet, + ops::{Bound, RangeBounds}, + os::unix::fs::MetadataExt, + path::Path, + sync::Arc, +}; + +use tokio::sync::{mpsc, OwnedSemaphorePermit, Semaphore}; +use tokio_util::sync::CancellationToken; + +use crate::{dir_chunk_numbers, slot_chunk_number}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct BlockReaderConfig { + pub worker_read_buffer_byte_size: usize, + pub read_worker_count: usize, + pub unprocessed_data_buffer_byte_size: usize, +} + +impl Default for BlockReaderConfig { + fn default() -> Self { + Self { + worker_read_buffer_byte_size: 8 * 1024 * 1024, // 8MB + read_worker_count: 1, + unprocessed_data_buffer_byte_size: 8 * 1024 * 1024, // 8MB + } + } +} + +pub struct BlockReader { + _cancellation_token: CancellationToken, + read_data_rx: mpsc::UnboundedReceiver<(OwnedSemaphorePermit, Vec)>, +} + +impl BlockReader { + pub async fn new( + immutabledb_path: P, + config: &BlockReaderConfig, + slot_range: R, + ) -> anyhow::Result + where + P: AsRef, + R: RangeBounds + Clone + Send + 'static, + { + let task_chunk_numbers = { + let min_slot_bound = match slot_range.start_bound().cloned() { + Bound::Included(s) => s, + Bound::Excluded(s) => s + 1, + Bound::Unbounded => 0, + }; + let max_slot_bound = match slot_range.end_bound().cloned() { + Bound::Included(s) => Some(s), + Bound::Excluded(s) => Some(s - 1), + Bound::Unbounded => None, + }; + + let min_chunk_number = slot_chunk_number(min_slot_bound); + let max_chunk_number = max_slot_bound.map(slot_chunk_number); + + let mut split_chunk_numbers = vec![(0u64, BTreeSet::new()); config.read_worker_count]; + + let chunk_numbers = dir_chunk_numbers(immutabledb_path.as_ref()).await?; + + for chunk_number in chunk_numbers { + if chunk_number < min_chunk_number + || max_chunk_number + .as_ref() + .copied() + .map(|s| chunk_number > s) + .unwrap_or(false) + { + continue; + } + + let f = tokio::fs::File::open( + immutabledb_path + .as_ref() + .join(format!("{chunk_number:05}.chunk")), + ) + .await?; + let f_metadata = f.metadata().await?; + + let min = split_chunk_numbers + .iter_mut() + .min_by_key(|(byte_count, _)| *byte_count) + .expect("At least one entry"); + + min.0 += f_metadata.size() as u64; + min.1.insert(chunk_number); + } + + split_chunk_numbers + }; + + let cancellation_token = CancellationToken::new(); + let (read_data_tx, read_data_rx) = mpsc::unbounded_channel(); + let read_semaphore = Arc::new(Semaphore::new(config.unprocessed_data_buffer_byte_size)); + + for (_, chunk_numbers) in task_chunk_numbers { + // Allocate a read buffer for each reader task + let read_buffer = vec![0u8; config.worker_read_buffer_byte_size]; + + tokio::spawn(chunk_reader_task::start( + immutabledb_path.as_ref().to_path_buf(), + read_buffer, + chunk_numbers, + slot_range.clone(), + read_semaphore.clone(), + read_data_tx.clone(), + cancellation_token.child_token(), + )); + } + + Ok(Self { + _cancellation_token: cancellation_token, + read_data_rx, + }) + } + + pub async fn next(&mut self) -> Option<(OwnedSemaphorePermit, Vec)> { + self.read_data_rx.recv().await + } +} + +mod chunk_reader_task { + use std::{collections::BTreeSet, ops::RangeBounds, path::PathBuf, sync::Arc}; + + use tokio::sync::{mpsc, OwnedSemaphorePermit, Semaphore}; + use tokio_util::sync::CancellationToken; + + use crate::read_chunk_file; + + pub async fn start( + immutabledb_path: PathBuf, + mut read_buffer: Vec, + mut chunk_numbers: BTreeSet, + slot_range: R, + read_semaphore: Arc, + read_data_tx: mpsc::UnboundedSender<(OwnedSemaphorePermit, Vec)>, + cancellation_token: CancellationToken, + ) where + R: RangeBounds, + { + 'main_loop: while let Some(chunk_number) = chunk_numbers.pop_first() { + let chunk_number_str = format!("{chunk_number:05}"); + + let chunk_file_path = immutabledb_path.join(format!("{chunk_number_str}.chunk")); + let secondary_file_path = + immutabledb_path.join(format!("{chunk_number_str}.secondary")); + + let mut chunk_iter = + read_chunk_file(chunk_file_path, secondary_file_path, &mut read_buffer) + .await + .expect("read_chunk_file"); + + loop { + tokio::select! { + _ = cancellation_token.cancelled() => { + break; + } + + res = chunk_iter.next() => { + match res { + Ok(Some((slot, block_data))) => { + // Filter out slots outside the required range + if !slot_range.contains(&slot) { + continue; + } + + let permit = read_semaphore + .clone() + .acquire_many_owned(block_data.len() as u32) + .await + .expect("Acquire"); + + if read_data_tx.send((permit, block_data.to_vec())).is_err() { + break 'main_loop; + } + } + Ok(None) => { + break; + } + Err(e) => panic!("{}", e), + } + } + } + } + } + } +} + +#[cfg(test)] +mod test { + use std::collections::BTreeSet; + + use pallas_traverse::MultiEraBlock; + + use super::{BlockReader, BlockReaderConfig}; + + #[tokio::test] + async fn test_block_reader() { + let config = BlockReaderConfig::default(); + + let mut block_reader = BlockReader::new("tests_data", &config, ..) + .await + .expect("Block reader started"); + + let mut block_numbers = BTreeSet::new(); + + while let Some((_permit, block_data)) = block_reader.next().await { + let decoded_block_data = MultiEraBlock::decode(&block_data).expect("Decoded"); + block_numbers.insert(decoded_block_data.number()); + } + + assert_eq!(block_numbers.len(), 9766); + + let mut last_block_number = block_numbers.pop_first().expect("At least one block"); + for block_number in block_numbers { + assert!(block_number == last_block_number + 1); + last_block_number = block_number; + } + } + + #[tokio::test] + async fn test_block_reader_range() { + let config = BlockReaderConfig::default(); + + // Read from block 1000 to block 4999 + let mut block_reader = BlockReader::new("tests_data", &config, 105480..185480) + .await + .expect("Block reader started"); + + let mut block_numbers = BTreeSet::new(); + + while let Some((_permit, block_data)) = block_reader.next().await { + let decoded_block_data = MultiEraBlock::decode(&block_data).expect("Decoded"); + block_numbers.insert(decoded_block_data.number()); + } + + assert_eq!(block_numbers.first(), Some(&1000)); + assert_eq!(block_numbers.last(), Some(&4999)); + + let mut last_block_number = block_numbers.pop_first().expect("At least one block"); + for block_number in block_numbers { + assert!(block_number == last_block_number + 1); + last_block_number = block_number; + } + } +} diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/src/lib.rs b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/src/lib.rs new file mode 100644 index 00000000000..a7ddfb14ec2 --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/src/lib.rs @@ -0,0 +1,286 @@ +pub mod block_reader; + +use std::{borrow::Cow, collections::BTreeSet, ffi::OsStr, path::Path}; + +use binary_layout::binary_layout; +use tokio::io::AsyncReadExt; + +pub async fn dir_chunk_numbers>(path: P) -> anyhow::Result> { + let mut dir = tokio::fs::read_dir(path).await?; + let mut chunk_numbers = BTreeSet::new(); + + while let Some(e) = dir.next_entry().await? { + let entry_path = e.path(); + + if let Some("chunk") = entry_path.extension().and_then(OsStr::to_str) { + let Some(stem) = entry_path.file_stem().and_then(OsStr::to_str) else { + continue; + }; + + chunk_numbers.insert(stem.parse()?); + } + } + + Ok(chunk_numbers) +} + +const CHUNK_SIZE: u32 = 21_600; + +pub fn slot_chunk_number(slot_no: u64) -> u32 { + (slot_no / (CHUNK_SIZE as u64)) as u32 +} + +binary_layout!(secondary_index_entry_layout, BigEndian, { + block_offset: u64, + header_offset: u16, + header_size: u16, + checksum: [u8; 4], + header_hash: [u8; 32], + block_or_ebb: u64, +}); + +const SECONDARY_INDEX_ENTRY_SIZE: usize = match secondary_index_entry_layout::SIZE { + Some(size) => size, + None => panic!("Expected secondary entry layout to have constant size"), +}; + +pub struct SecondaryIndexEntry { + block_offset: u64, +} + +pub struct SecondaryIndex { + entries: Vec, +} + +impl SecondaryIndex { + pub async fn from_file>(path: P) -> anyhow::Result { + let mut secondary_index_file = tokio::fs::File::open(path).await?; + + let mut entries = Vec::new(); + let mut entry_buf = [0u8; SECONDARY_INDEX_ENTRY_SIZE]; + + loop { + // Maybe this should use the sync version of sync_exact? + if let Err(err) = secondary_index_file.read_exact(&mut entry_buf).await { + if err.kind() == std::io::ErrorKind::UnexpectedEof { + break; + } + + return Err(err.into()); + } + + let view = secondary_index_entry_layout::View::new(&entry_buf); + + entries.push(SecondaryIndexEntry { + block_offset: view.block_offset().read(), + }); + } + + Ok(SecondaryIndex { entries }) + } +} + +pub struct ReadChunkFile<'a> { + secondary_index: SecondaryIndex, + chunk_file_data: &'a mut Vec, + counter: usize, +} + +impl<'a> ReadChunkFile<'a> { + async fn next(&mut self) -> anyhow::Result> { + let next_counter = self.counter + 1; + + let (from, to) = match next_counter.cmp(&self.secondary_index.entries.len()) { + std::cmp::Ordering::Less => { + let from = self.secondary_index.entries[self.counter].block_offset as usize; + let to = self.secondary_index.entries[next_counter].block_offset as usize; + + (from, to) + } + std::cmp::Ordering::Equal => { + let from = self.secondary_index.entries[self.counter].block_offset as usize; + let to = self.chunk_file_data.len(); + + (from, to) + } + std::cmp::Ordering::Greater => { + return Ok(None); + } + }; + + match self.chunk_file_data.get(from..to) { + Some(block_bytes) => { + self.counter = next_counter; + let h: TestDecode = pallas_codec::minicbor::decode(block_bytes)?; + + Ok(Some((h.0.slot(), block_bytes))) + } + None => Ok(None), + } + } +} + +#[derive(Debug)] +struct TestDecode<'b>(pallas_traverse::MultiEraHeader<'b>); + +impl<'b, C> pallas_codec::minicbor::Decode<'b, C> for TestDecode<'b> { + fn decode( + d: &mut pallas_codec::minicbor::Decoder<'b>, + _ctx: &mut C, + ) -> Result { + d.array()?; + + let era = d.u16()?; + + let _m = d.array()?; + + let header = match era { + 0 => { + let header = d.decode()?; + pallas_traverse::MultiEraHeader::EpochBoundary(Cow::Owned(header)) + } + 1 => { + let header = d.decode()?; + pallas_traverse::MultiEraHeader::Byron(Cow::Owned(header)) + } + 2 => { + let header = d.decode()?; + pallas_traverse::MultiEraHeader::AlonzoCompatible(Cow::Owned(header)) + } + 6 => { + let header = d.decode()?; + pallas_traverse::MultiEraHeader::Babbage(Cow::Owned(header)) + } + 3 | 4 | 5 | 7 => { + let header = d.decode()?; + pallas_traverse::MultiEraHeader::AlonzoCompatible(Cow::Owned(header)) + } + _ => { + return Err(pallas_codec::minicbor::decode::Error::message( + "Invalid CBOR", + )) + } + }; + + Ok(TestDecode(header)) + } +} + +use pallas_codec::minicbor::decode::{Token, Tokenizer}; + +use pallas_traverse::Era; + +#[derive(Debug)] +pub enum Outcome { + Matched(Era), + EpochBoundary, + Inconclusive, +} + +// Executes a very lightweight inspection of the initial tokens of the CBOR +// block payload to extract the tag of the block wrapper which defines the era +// of the contained bytes. +pub fn block_era(cbor: &[u8]) -> Outcome { + let mut tokenizer = Tokenizer::new(cbor); + + if !matches!(tokenizer.next(), Some(Ok(Token::Array(2)))) { + return Outcome::Inconclusive; + } + + match tokenizer.next() { + Some(Ok(Token::U8(variant))) => match variant { + 0 => Outcome::EpochBoundary, + 1 => Outcome::Matched(Era::Byron), + 2 => Outcome::Matched(Era::Shelley), + 3 => Outcome::Matched(Era::Allegra), + 4 => Outcome::Matched(Era::Mary), + 5 => Outcome::Matched(Era::Alonzo), + 6 => Outcome::Matched(Era::Babbage), + 7 => Outcome::Matched(Era::Conway), + _ => Outcome::Inconclusive, + }, + _ => Outcome::Inconclusive, + } +} + +pub async fn read_chunk_file( + path: impl AsRef, + secondary_index_path: impl AsRef, + data_buffer: &mut Vec, +) -> anyhow::Result { + let mut chunk_file = tokio::fs::File::open(path).await?; + + data_buffer.clear(); + chunk_file.read_to_end(data_buffer).await?; + + let secondary_index = SecondaryIndex::from_file(secondary_index_path).await?; + + Ok(ReadChunkFile { + secondary_index, + chunk_file_data: data_buffer, + counter: 0, + }) +} + +#[cfg(test)] +mod test { + use std::collections::BTreeSet; + + use pallas_traverse::MultiEraBlock; + + use crate::{dir_chunk_numbers, read_chunk_file, SecondaryIndex}; + + #[tokio::test] + async fn test_dir_chunk_numbers() { + let expected_chunk_numbers = (0..=12).collect::>(); + + let chunk_numbers = dir_chunk_numbers("tests_data") + .await + .expect("Successfully read chunk numbers"); + + assert_eq!(chunk_numbers, expected_chunk_numbers); + } + + #[tokio::test] + async fn test_secondary_index_from_file() { + let index = SecondaryIndex::from_file("tests_data/00012.secondary") + .await + .expect("Successfully read secondary index file"); + + assert_eq!(index.entries.len(), 1080); + } + + #[tokio::test] + async fn test_read_chunk_file() { + let mut buffer = Vec::new(); + + let mut iter = read_chunk_file( + "tests_data/00012.chunk", + "tests_data/00012.secondary", + &mut buffer, + ) + .await + .expect("Chunk file iterator created"); + + let mut last_block_no = None; + let mut count = 0; + while let Some((_, data)) = iter + .next() + .await + .expect("Successfully ready block data from chunk file") + { + // Make sure we are getting valid block data + let block = MultiEraBlock::decode(data).expect("Valid block"); + + if let Some(block_no) = last_block_no { + assert!(block.number() > block_no); + assert_eq!(block.number() - block_no, 1u64); + } + + last_block_no = Some(block.number()); + count += 1; + } + + assert_eq!(count, 1080); + } +} diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/testbench/Cargo.toml b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/testbench/Cargo.toml new file mode 100644 index 00000000000..3f4f5b39930 --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/testbench/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "catalyst-immutabledb-reader-testbench" +version = "0.1.0" +edition = "2021" + +[dependencies] +cardano-immutabledb-reader = { workspace = true } +clap = { version = "4.5.4", default-features = false, features = ["derive", "help", "usage", "std"] } +pallas-traverse = "0.25.0" +parse-size = "1.0.0" +tokio = { workspace = true, features = ["rt-multi-thread", "macros", "time"] } diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/testbench/src/main.rs b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/testbench/src/main.rs new file mode 100644 index 00000000000..4cbb8244601 --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/testbench/src/main.rs @@ -0,0 +1,93 @@ +use std::{ + path::PathBuf, + sync::{ + atomic::{AtomicU64, Ordering::Acquire}, + Arc, + }, + time::Duration, +}; + +use cardano_immutabledb_reader::block_reader::{BlockReader, BlockReaderConfig}; +use clap::Parser; +use pallas_traverse::MultiEraBlock; +use tokio::sync::mpsc; + +fn parse_byte_size(s: &str) -> Result { + parse_size::parse_size(s).map_err(|e| e.to_string()) +} + +#[derive(Parser)] +struct Cli { + #[clap(short, long)] + immutabledb_path: PathBuf, + #[clap(long, default_value_t = 1)] + read_worker_count: usize, + #[clap(long, value_parser = parse_byte_size, default_value = "128MiB")] + read_worker_buffer_size: u64, + #[clap(long, value_parser = parse_byte_size, default_value = "16MiB")] + unprocessed_data_buffer_size: u64, + #[clap(long, default_value_t = 1)] + processing_worker_count: usize, +} + +#[tokio::main] +async fn main() { + let cli_args = Cli::parse(); + + let config = BlockReaderConfig { + worker_read_buffer_byte_size: cli_args.read_worker_buffer_size as usize, + read_worker_count: cli_args.read_worker_count, + unprocessed_data_buffer_byte_size: cli_args.unprocessed_data_buffer_size as usize, + }; + + let mut block_reader = BlockReader::new(cli_args.immutabledb_path, &config, ..) + .await + .expect("Block reader started"); + + let byte_count = Arc::new(AtomicU64::new(0)); + let block_number = Arc::new(AtomicU64::new(0)); + let mut processing_workers_txs = Vec::new(); + + for _ in 0..cli_args.processing_worker_count { + let (worker_tx, mut worker_rx) = mpsc::unbounded_channel::<(_, Vec<_>)>(); + + tokio::spawn({ + let worker_byte_count = byte_count.clone(); + let worker_block_number = block_number.clone(); + + async move { + while let Some((_permit, block_data)) = worker_rx.recv().await { + let decoded_block_data = MultiEraBlock::decode(&block_data).expect("Decoded"); + + worker_byte_count.fetch_add(block_data.len() as u64, Acquire); + worker_block_number.fetch_max(decoded_block_data.number(), Acquire); + } + } + }); + + processing_workers_txs.push(worker_tx); + } + + let mut ticker = tokio::time::interval(Duration::from_secs(1)); + let mut i = 0; + + loop { + tokio::select! { + res = block_reader.next() => { + match res { + Some(v) => { + processing_workers_txs[i].send(v).expect("Worker"); + i = (i + 1) % cli_args.processing_worker_count; + } + None => { + break; + } + } + } + + _ = ticker.tick() => { + println!("BLOCK NUMBER {} | {} MB/s", block_number.load(Acquire), byte_count.swap(0, std::sync::atomic::Ordering::Acquire) / 1024 / 1024); + } + } + } +} diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00000.chunk b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00000.chunk new file mode 100644 index 00000000000..877d9c385c4 Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00000.chunk differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00000.primary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00000.primary new file mode 100644 index 00000000000..7da1ca236fe Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00000.primary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00000.secondary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00000.secondary new file mode 100644 index 00000000000..f2c6b03b20f Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00000.secondary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00001.chunk b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00001.chunk new file mode 100644 index 00000000000..4449630d0d9 Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00001.chunk differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00001.primary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00001.primary new file mode 100644 index 00000000000..1faafcd384e Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00001.primary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00001.secondary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00001.secondary new file mode 100644 index 00000000000..3c2c658f404 Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00001.secondary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00002.chunk b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00002.chunk new file mode 100644 index 00000000000..badab7124a5 Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00002.chunk differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00002.primary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00002.primary new file mode 100644 index 00000000000..e84df653ea3 Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00002.primary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00002.secondary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00002.secondary new file mode 100644 index 00000000000..c553eefe84f Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00002.secondary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00003.chunk b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00003.chunk new file mode 100644 index 00000000000..6c3e6ea3be1 Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00003.chunk differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00003.primary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00003.primary new file mode 100644 index 00000000000..d34bd721955 Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00003.primary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00003.secondary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00003.secondary new file mode 100644 index 00000000000..70d03c65cce Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00003.secondary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00004.chunk b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00004.chunk new file mode 100644 index 00000000000..fe750395462 Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00004.chunk differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00004.primary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00004.primary new file mode 100644 index 00000000000..879a2b072ac Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00004.primary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00004.secondary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00004.secondary new file mode 100644 index 00000000000..9574aca554e Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00004.secondary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00005.chunk b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00005.chunk new file mode 100644 index 00000000000..32f0e0f7331 Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00005.chunk differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00005.primary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00005.primary new file mode 100644 index 00000000000..879a2b072ac Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00005.primary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00005.secondary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00005.secondary new file mode 100644 index 00000000000..1c6fa910d86 Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00005.secondary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00006.chunk b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00006.chunk new file mode 100644 index 00000000000..7d4308a0089 Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00006.chunk differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00006.primary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00006.primary new file mode 100644 index 00000000000..879a2b072ac Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00006.primary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00006.secondary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00006.secondary new file mode 100644 index 00000000000..296786c85ff Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00006.secondary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00007.chunk b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00007.chunk new file mode 100644 index 00000000000..126e9f58e23 Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00007.chunk differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00007.primary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00007.primary new file mode 100644 index 00000000000..879a2b072ac Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00007.primary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00007.secondary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00007.secondary new file mode 100644 index 00000000000..2165884ce25 Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00007.secondary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00008.chunk b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00008.chunk new file mode 100644 index 00000000000..a46e94d9d7f Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00008.chunk differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00008.primary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00008.primary new file mode 100644 index 00000000000..879a2b072ac Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00008.primary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00008.secondary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00008.secondary new file mode 100644 index 00000000000..c53b702566d Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00008.secondary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00009.chunk b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00009.chunk new file mode 100644 index 00000000000..8d2603606ac Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00009.chunk differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00009.primary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00009.primary new file mode 100644 index 00000000000..879a2b072ac Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00009.primary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00009.secondary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00009.secondary new file mode 100644 index 00000000000..2698f1361b5 Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00009.secondary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00010.chunk b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00010.chunk new file mode 100644 index 00000000000..5facd1c3bc4 Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00010.chunk differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00010.primary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00010.primary new file mode 100644 index 00000000000..879a2b072ac Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00010.primary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00010.secondary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00010.secondary new file mode 100644 index 00000000000..17f81d7074b Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00010.secondary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00011.chunk b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00011.chunk new file mode 100644 index 00000000000..c945623364b Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00011.chunk differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00011.primary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00011.primary new file mode 100644 index 00000000000..879a2b072ac Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00011.primary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00011.secondary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00011.secondary new file mode 100644 index 00000000000..ed2b77bdd55 Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00011.secondary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00012.chunk b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00012.chunk new file mode 100644 index 00000000000..da087d9b341 Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00012.chunk differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00012.primary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00012.primary new file mode 100644 index 00000000000..879a2b072ac Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00012.primary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00012.secondary b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00012.secondary new file mode 100644 index 00000000000..8923edffa34 Binary files /dev/null and b/catalyst-gateway/crates/catalyst-chain-syncer/crates/cardano-immutabledb-reader/tests_data/00012.secondary differ diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-types/Cargo.toml b/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-types/Cargo.toml new file mode 100644 index 00000000000..8dfda70319a --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-types/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "catalyst-chaindata-types" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +chrono = { workspace = true } +lazy_static = { workspace = true } +pallas-addresses = { workspace = true } +pallas-traverse = { workspace = true } +serde_json = { workspace = true } +serde = { workspace = true, features = ["derive", "std"] } diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-types/src/lib.rs b/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-types/src/lib.rs new file mode 100644 index 00000000000..26c9c95c217 --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-types/src/lib.rs @@ -0,0 +1,214 @@ +pub mod serde_size; + +use std::str::FromStr; + +use chrono::{DateTime, Utc}; +use pallas_traverse::{ + wellknown::GenesisValues, MultiEraAsset, MultiEraBlock, MultiEraPolicyAssets, MultiEraTx, +}; +use serde::Serialize; + +use crate::serde_size::serde_size; + +lazy_static::lazy_static! { + static ref MAINNET_GENESIS_VALUES: GenesisValues = GenesisValues::mainnet(); + static ref PREPROD_GENESIS_VALUES: GenesisValues = GenesisValues::preprod(); +} + +#[derive(Debug, Clone, Copy)] +pub struct CardanoBlock { + pub block_no: u64, + pub slot_no: u64, + pub epoch_no: u64, + pub network: Network, + pub block_time: DateTime, + pub block_hash: [u8; 32], + pub previous_hash: Option<[u8; 32]>, +} + +impl CardanoBlock { + pub fn from_block(block: &MultiEraBlock, network: Network) -> anyhow::Result { + Ok(Self { + block_no: block.number(), + slot_no: block.slot(), + epoch_no: block.epoch(network.genesis_values()).0, + network, + block_time: DateTime::from_timestamp( + block.wallclock(network.genesis_values()) as i64, + 0, + ) + .ok_or_else(|| anyhow::anyhow!("Failed to parse DateTime from timestamp"))?, + block_hash: *block.hash(), + previous_hash: block.header().previous_hash().as_ref().map(|h| **h), + }) + } +} + +pub struct CardanoTransaction { + pub hash: [u8; 32], + pub block_no: u64, + pub network: Network, +} + +impl CardanoTransaction { + pub fn many_from_block(block: &MultiEraBlock, network: Network) -> anyhow::Result> { + let data = block + .txs() + .into_iter() + .map(|tx| Self { + hash: *tx.hash(), + block_no: block.number(), + network, + }) + .collect(); + + Ok(data) + } +} + +pub struct CardanoTxo { + pub transaction_hash: [u8; 32], + pub index: u32, + pub value: u64, + pub assets: serde_json::Value, + pub assets_size_estimate: usize, + pub stake_credential: Option<[u8; 28]>, +} + +impl CardanoTxo { + pub fn from_transactions(txs: &[MultiEraTx]) -> anyhow::Result> { + let data = txs + .iter() + .flat_map(|tx| { + tx.outputs().into_iter().zip(0..).map(|(tx_output, index)| { + let address = tx_output.address()?; + + let stake_credential = match address { + pallas_addresses::Address::Byron(_) => None, + pallas_addresses::Address::Shelley(address) => address.try_into().ok(), + pallas_addresses::Address::Stake(stake_address) => Some(stake_address), + }; + + // let parsed_assets = parse_policy_assets(&tx_output.non_ada_assets()); + // let assets_size_estimate = serde_size(&parsed_assets)?; + // let assets = serde_json::to_value(&parsed_assets)?; + let assets_size_estimate = 0; + let assets = serde_json::Value::Null; + + Ok(Self { + transaction_hash: *tx.hash(), + index, + value: tx_output.lovelace_amount(), + assets, + assets_size_estimate, + stake_credential: stake_credential.map(|a| **a.payload().as_hash()), + }) + }) + }) + .collect::>>()?; + + Ok(data) + } +} + +pub struct CardanoSpentTxo { + pub from_transaction_hash: [u8; 32], + pub index: u32, + pub to_transaction_hash: [u8; 32], +} + +impl CardanoSpentTxo { + pub fn from_transactions(txs: &[MultiEraTx]) -> anyhow::Result> { + let data = txs + .iter() + .flat_map(|tx| { + tx.inputs().into_iter().map(|tx_input| Self { + from_transaction_hash: **tx_input.output_ref().hash(), + index: tx_input.output_ref().index() as u32, + to_transaction_hash: *tx.hash(), + }) + }) + .collect(); + + Ok(data) + } +} + +#[derive(Debug, Serialize)] +struct Asset { + pub policy_id: String, + pub name: String, + pub amount: u64, +} + +#[derive(Debug, Serialize)] +struct PolicyAsset { + pub policy_hash: String, + pub assets: Vec, +} + +fn parse_policy_assets(assets: &[MultiEraPolicyAssets<'_>]) -> Vec { + assets + .iter() + .map(|asset| PolicyAsset { + policy_hash: asset.policy().to_string(), + assets: parse_child_assets(&asset.assets()), + }) + .collect() +} + +fn parse_child_assets(assets: &[MultiEraAsset]) -> Vec { + assets + .iter() + .filter_map(|asset| match asset { + MultiEraAsset::AlonzoCompatibleOutput(id, name, amount) => Some(Asset { + policy_id: id.to_string(), + name: name.to_string(), + amount: *amount, + }), + MultiEraAsset::AlonzoCompatibleMint(id, name, amount) => { + let amount = u64::try_from(*amount).ok()?; + Some(Asset { + policy_id: id.to_string(), + name: name.to_string(), + amount, + }) + } + _ => None, + }) + .collect() +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Network { + Mainnet, + Preprod, +} + +impl Network { + pub fn id(&self) -> u16 { + match self { + Network::Mainnet => 0, + Network::Preprod => 1, + } + } + + pub fn genesis_values(&self) -> &'static GenesisValues { + match self { + Network::Mainnet => &MAINNET_GENESIS_VALUES, + Network::Preprod => &PREPROD_GENESIS_VALUES, + } + } +} + +impl FromStr for Network { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + match s { + "mainnet" => Ok(Self::Mainnet), + "preprod" => Ok(Self::Preprod), + _ => Err(anyhow::format_err!("Unknown network: '{}'", s)), + } + } +} diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-types/src/serde_size.rs b/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-types/src/serde_size.rs new file mode 100644 index 00000000000..d8798f8818f --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-types/src/serde_size.rs @@ -0,0 +1,21 @@ +use serde::Serialize; + +struct ByteCounter(usize); + +impl std::io::Write for ByteCounter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.0 += buf.len(); + Ok(buf.len()) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +pub fn serde_size(data: &T) -> anyhow::Result { + let mut byte_counter = ByteCounter(0); + serde_json::to_writer(&mut byte_counter, data)?; + + Ok(byte_counter.0) +} diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-writer/Cargo.toml b/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-writer/Cargo.toml new file mode 100644 index 00000000000..42a58afbda2 --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-writer/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "catalyst-chaindata-writer" +version = "0.1.0" +edition = "2021" + +[dependencies] +catalyst-chaindata-types = { workspace = true } +postgres-util = { workspace = true } + +anyhow = { workspace = true } +tokio = { workspace = true, features = ["macros", "rt", "sync"] } diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-writer/src/lib.rs b/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-writer/src/lib.rs new file mode 100644 index 00000000000..5cefd3e0803 --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-writer/src/lib.rs @@ -0,0 +1,195 @@ +pub mod writers; + +use std::{future::Future, mem, sync::Arc}; + +use anyhow::Result; +use catalyst_chaindata_types::{CardanoBlock, CardanoSpentTxo, CardanoTransaction, CardanoTxo}; +use tokio::{ + sync::{mpsc, OwnedSemaphorePermit, Semaphore}, + task::{JoinError, JoinHandle}, +}; + +pub struct WriteData { + pub block: CardanoBlock, + pub transactions: Vec, + pub transaction_outputs: Vec, + pub spent_transaction_outputs: Vec, +} + +impl WriteData { + pub fn byte_size(&self) -> usize { + mem::size_of_val(&self.block) + + self + .transactions + .iter() + .map(mem::size_of_val) + .sum::() + + self + .transaction_outputs + .iter() + .map(|txo| mem::size_of_val(txo) + txo.assets_size_estimate) + .sum::() + + self + .spent_transaction_outputs + .iter() + .map(mem::size_of_val) + .sum::() + } +} + +pub trait Writer { + fn batch_write( + &mut self, + data: Vec, + ) -> impl Future> + Send; +} + +#[derive(Clone)] +pub struct ChainDataWriterHandle { + write_semaphore: Arc, + write_data_tx: mpsc::UnboundedSender<(OwnedSemaphorePermit, WriteData)>, +} + +impl ChainDataWriterHandle { + pub async fn write(&self, d: WriteData) -> Result<()> { + let permit = self + .clone() + .write_semaphore + .acquire_many_owned(d.byte_size() as u32) + .await?; + + self.write_data_tx.send((permit, d))?; + + Ok(()) + } +} + +pub struct ChainDataWriter { + write_worker_task_handle: JoinHandle<()>, +} + +impl ChainDataWriter { + pub async fn new( + writer: W, + write_buffer_byte_size: usize, + ) -> Result<(Self, ChainDataWriterHandle)> + where + W: Writer + Send + 'static, + { + let (write_data_tx, write_data_rx) = mpsc::unbounded_channel(); + + let write_worker_task_handle = tokio::spawn(write_task::run( + writer, + write_buffer_byte_size, + write_data_rx, + )); + + let this = Self { + write_worker_task_handle, + }; + + let handle = ChainDataWriterHandle { + // Explain + write_semaphore: Arc::new(Semaphore::new(write_buffer_byte_size * 2)), + write_data_tx, + }; + + Ok((this, handle)) + } +} + +impl Future for ChainDataWriter { + type Output = Result<(), JoinError>; + + fn poll( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll { + let mut write_worker_task_handle = std::pin::pin!(&mut self.write_worker_task_handle); + write_worker_task_handle.as_mut().poll(cx) + } +} + +mod write_task { + use std::time::Duration; + + use tokio::sync::{mpsc, OwnedSemaphorePermit}; + + use crate::{WriteData, Writer}; + + pub async fn run( + mut writer: W, + write_buffer_byte_size: usize, + mut write_data_rx: mpsc::UnboundedReceiver<(OwnedSemaphorePermit, WriteData)>, + ) where + W: Writer, + { + let mut merged_permits: Option = None; + let mut writer_data_buffer = Vec::new(); + + let mut total_byte_count = 0; + + let mut flush = false; + let mut close = false; + + let mut ticker = tokio::time::interval(Duration::from_secs(30)); + + let mut latest_block = 0; + + loop { + tokio::select! { + // If we did not receive data for a while, flush the buffers. + _ = ticker.tick() => { + flush = true; + } + + res = write_data_rx.recv() => { + // Reset the ticker since we received data. + ticker.reset(); + + match res { + None => { + flush = true; + close = true; + } + Some((permit, data)) => { + total_byte_count += data.byte_size(); + + match merged_permits.as_mut() { + Some(p) => p.merge(permit), + None => merged_permits = Some(permit), + } + + latest_block = latest_block.max(data.block.block_no); + writer_data_buffer.push(data); + } + } + } + } + + if (flush && total_byte_count > 0) || total_byte_count >= write_buffer_byte_size { + println!( + "WRITING {} | SIZE {:.2} MB", + latest_block, + (total_byte_count as f64) / (1024.0 * 1024.0), + ); + + writer + .batch_write(std::mem::take(&mut writer_data_buffer)) + .await + .expect("Write"); + + merged_permits = None; + total_byte_count = 0; + + ticker.reset(); + + flush = false; + } + + if close { + break; + } + } + } +} diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-writer/src/writers/mod.rs b/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-writer/src/writers/mod.rs new file mode 100644 index 00000000000..26e9103cd54 --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-writer/src/writers/mod.rs @@ -0,0 +1 @@ +pub mod postgres; diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-writer/src/writers/postgres.rs b/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-writer/src/writers/postgres.rs new file mode 100644 index 00000000000..81e8ce3e518 --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/catalyst-chaindata-writer/src/writers/postgres.rs @@ -0,0 +1,182 @@ +use catalyst_chaindata_types::{CardanoBlock, CardanoSpentTxo, CardanoTransaction, CardanoTxo}; +use postgres_util::{ + connection::Connection, + tokio_postgres::{self, binary_copy::BinaryCopyInWriter, types::Type}, +}; + +pub struct PostgresWriter { + conn: Connection, +} + +impl PostgresWriter { + pub async fn open(conn_string: &str) -> anyhow::Result { + let conn = Connection::open(conn_string).await?; + + Ok(Self { conn }) + } +} + +impl crate::Writer for PostgresWriter { + async fn batch_write(&mut self, data: Vec) -> anyhow::Result<()> { + let mut blocks = Vec::new(); + let mut transactions = Vec::new(); + let mut txos = Vec::new(); + let mut spent_txos = Vec::new(); + + for d in data { + blocks.push(d.block); + transactions.extend(d.transactions); + txos.extend(d.transaction_outputs); + spent_txos.extend(d.spent_transaction_outputs); + } + + let tx = self.conn.client_mut().transaction().await?; + + tokio::try_join!( + copy_blocks(&tx, &blocks), + copy_transactions(&tx, &transactions), + copy_txos(&tx, &txos), + copy_spent_txos(&tx, &spent_txos) + )?; + + tx.commit().await?; + + Ok(()) + } +} + +async fn copy_blocks( + tx: &tokio_postgres::Transaction<'_>, + data: &[CardanoBlock], +) -> anyhow::Result<()> { + let sink = tx.copy_in("COPY cardano_blocks (block_no, slot_no, epoch_no, network_id, block_time, block_hash, previous_hash) FROM STDIN BINARY").await?; + + let writer = BinaryCopyInWriter::new( + sink, + &[ + Type::INT8, + Type::INT8, + Type::INT8, + Type::INT2, + Type::TIMESTAMPTZ, + Type::BYTEA, + Type::BYTEA, + ], + ); + tokio::pin!(writer); + + for cb in data { + writer + .as_mut() + .write(&[ + &(cb.block_no as i64), + &(cb.slot_no as i64), + &(cb.epoch_no as i64), + &(cb.network.id() as i16), + &cb.block_time, + &cb.block_hash.as_slice(), + &cb.previous_hash.as_ref().map(|h| h.as_slice()), + ]) + .await?; + } + + writer.finish().await?; + + Ok(()) +} + +async fn copy_transactions( + tx: &tokio_postgres::Transaction<'_>, + data: &[CardanoTransaction], +) -> anyhow::Result<()> { + let sink = tx + .copy_in("COPY cardano_transactions (block_no, network_id, hash) FROM STDIN BINARY") + .await?; + + let writer = BinaryCopyInWriter::new(sink, &[Type::INT8, Type::INT2, Type::BYTEA]); + tokio::pin!(writer); + + for tx_data in data { + writer + .as_mut() + .write(&[ + &(tx_data.block_no as i64), + &(tx_data.network.id() as i16), + &tx_data.hash.as_slice(), + ]) + .await + .expect("WRITE"); + } + + writer.finish().await.expect("FINISH"); + + Ok(()) +} + +async fn copy_txos( + tx: &tokio_postgres::Transaction<'_>, + data: &[CardanoTxo], +) -> anyhow::Result<()> { + let sink = tx + .copy_in("COPY cardano_txo (transaction_hash, index, value, assets, stake_credential) FROM STDIN BINARY") + .await + .expect("COPY"); + + let writer = BinaryCopyInWriter::new( + sink, + &[ + Type::BYTEA, + Type::INT4, + Type::INT8, + Type::JSONB, + Type::BYTEA, + ], + ); + tokio::pin!(writer); + + for txo_data in data { + writer + .as_mut() + .write(&[ + &txo_data.transaction_hash.as_slice(), + &(txo_data.index as i32), + &(txo_data.value as i64), + &txo_data.assets, + &txo_data.stake_credential.as_ref().map(|a| a.as_slice()), + ]) + .await + .expect("WRITE"); + } + + writer.finish().await.expect("FINISH"); + + Ok(()) +} + +async fn copy_spent_txos( + tx: &tokio_postgres::Transaction<'_>, + data: &[CardanoSpentTxo], +) -> anyhow::Result<()> { + let sink = tx. + copy_in("COPY cardano_spent_txo (from_transaction_hash, index, to_transaction_hash) FROM STDIN BINARY") + .await + .expect("COPY"); + let writer = BinaryCopyInWriter::new(sink, &[Type::BYTEA, Type::INT4, Type::BYTEA]); + tokio::pin!(writer); + + for spent_txo_data in data { + writer + .as_mut() + .write(&[ + &spent_txo_data.from_transaction_hash.as_slice(), + &(spent_txo_data.index as i32), + &spent_txo_data.to_transaction_hash.as_slice(), + ]) + .await + .expect("WRITE"); + } + + writer.finish().await.expect("FINISH"); + + Ok(()) +} diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/Cargo.toml b/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/Cargo.toml new file mode 100644 index 00000000000..cae9d84000a --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "postgres-util" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +tokio = { workspace = true, default-features = false, features = ["macros", "rt"] } +tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1"] } diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/sql/create_indexes.sql b/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/sql/create_indexes.sql new file mode 100644 index 00000000000..a5fd8d17903 --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/sql/create_indexes.sql @@ -0,0 +1,16 @@ +-- cardano_blocks indexes +CREATE INDEX IF NOT EXISTS cardano_blocks_block_no ON cardano_blocks USING BTREE (block_no) INCLUDE (slot_no); +CREATE INDEX IF NOT EXISTS cardano_blocks_slot_no ON cardano_blocks USING BTREE(slot_no); +CREATE INDEX IF NOT EXISTS cardano_blocks_block_time ON cardano_blocks USING BTREE(block_time); + +-- cardano_transactions indexes +CREATE INDEX IF NOT EXISTS cardano_transactions_hash ON cardano_transactions USING BTREE(hash); +CREATE INDEX IF NOT EXISTS cardano_transactions_block_no ON cardano_transactions USING BTREE(block_no); + +-- cardano_txo indexes +CREATE INDEX IF NOT EXISTS cardano_txo_output_ref ON cardano_txo USING BTREE(transaction_hash, index); +CREATE INDEX IF NOT EXISTS cardano_txo_stake_credential ON cardano_txo USING BTREE(stake_credential); + +-- cardano_spent_txo indexes +CREATE INDEX IF NOT EXISTS cardano_spent_txo_output_ref ON cardano_spent_txo USING BTREE(from_transaction_hash, index); +CREATE INDEX IF NOT EXISTS cardano_spent_txo_to_transaction_hash ON cardano_spent_txo USING BTREE(to_transaction_hash); diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/sql/create_tables.sql b/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/sql/create_tables.sql new file mode 100644 index 00000000000..a4870ec28fa --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/sql/create_tables.sql @@ -0,0 +1,39 @@ +CREATE TABLE IF NOT EXISTS cardano_networks ( + id SMALLINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + name TEXT NOT NULL +); + +CREATE UNIQUE INDEX IF NOT EXISTS cardano_networks_name ON cardano_networks USING BTREE(name); + +INSERT INTO cardano_networks (name) VALUES ('mainnet') ON CONFLICT (name) DO NOTHING; +INSERT INTO cardano_networks (name) VALUES ('preprod') ON CONFLICT (name) DO NOTHING; + +CREATE TABLE IF NOT EXISTS cardano_blocks ( + block_no BIGINT NOT NULL, + slot_no BIGINT NOT NULL, + epoch_no BIGINT NOT NULL, + network_id SMALLINT NOT NULL, + block_time TIMESTAMP WITH TIME ZONE NOT NULL, + block_hash BYTEA NOT NULL, + previous_hash BYTEA +); + +CREATE TABLE IF NOT EXISTS cardano_transactions ( + hash BYTEA NOT NULL, + block_no BIGINT NOT NULL, + network_id SMALLINT NOT NULL +); + +CREATE TABLE IF NOT EXISTS cardano_txo ( + transaction_hash BYTEA NOT NULL, + index INTEGER NOT NULL, + value BIGINT NOT NULL, + assets JSONB, + stake_credential BYTEA +); + +CREATE TABLE IF NOT EXISTS cardano_spent_txo ( + from_transaction_hash BYTEA NOT NULL, + index INTEGER NOT NULL, + to_transaction_hash BYTEA NOT NULL +); diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/sql/find_missing_data.sql b/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/sql/find_missing_data.sql new file mode 100644 index 00000000000..a96f2591674 --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/sql/find_missing_data.sql @@ -0,0 +1,13 @@ +WITH block_no_ranges AS ( + SELECT + block_no, + slot_no, + LEAD (block_no) OVER (ORDER BY block_no) AS next_block_no + FROM cardano_blocks +) +SELECT + r.slot_no, + b.slot_no AS next_slot_no +FROM block_no_ranges r +JOIN cardano_blocks b ON b.block_no = next_block_no +WHERE (r.next_block_no - r.block_no) > 1 diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/sql/latest_slot.sql b/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/sql/latest_slot.sql new file mode 100644 index 00000000000..7bc4027ebf8 --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/sql/latest_slot.sql @@ -0,0 +1 @@ +SELECT slot_no from cardano_blocks ORDER BY block_no DESC LIMIT 1; diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/sql/total_utxo_amount.sql b/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/sql/total_utxo_amount.sql new file mode 100644 index 00000000000..d8d10edfae4 --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/sql/total_utxo_amount.sql @@ -0,0 +1,19 @@ +SELECT + SUM(cardano_txo.value) +FROM cardano_txo +INNER JOIN cardano_transactions +ON cardano_transactions.hash = cardano_txo.transaction_hash +INNER JOIN cardano_blocks +ON cardano_blocks.block_no = cardano_transactions.block_no +LEFT JOIN cardano_spent_txo +ON cardano_txo.transaction_hash = cardano_spent_txo.from_transaction_hash +AND cardano_txo.index = cardano_spent_txo.index +LEFT JOIN cardano_transactions spent_txo_txs +ON spent_txo_txs.hash = cardano_spent_txo.to_transaction_hash +LEFT JOIN cardano_blocks spent_txo_txs_blocks +ON spent_txo_txs_blocks.block_no = spent_txo_txs.block_no +WHERE + cardano_txo.stake_credential = '\x59938af7640bd602f5b5ed6da219f63d48a39b6f55fda093ea0df062' AND + (cardano_spent_txo.to_transaction_hash IS NULL OR spent_txo_txs_blocks.slot_no > 56364174) AND + cardano_transactions.network_id = 1 AND + cardano_blocks.slot_no <= 56364174 diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/src/connection.rs b/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/src/connection.rs new file mode 100644 index 00000000000..c0e035e2d20 --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/src/connection.rs @@ -0,0 +1,33 @@ +use std::future::Future; + +pub struct Connection { + client: tokio_postgres::Client, + conn_task_handle: tokio::task::JoinHandle<()>, +} + +impl Connection { + pub async fn open(conn_string: &str) -> anyhow::Result { + let (client, conn) = tokio_postgres::connect(conn_string, tokio_postgres::NoTls).await?; + + let conn_task_handle = tokio::spawn(async move { + conn.await.expect("Success"); + }); + + Ok(Self { + client, + conn_task_handle, + }) + } + + pub fn client(&self) -> &tokio_postgres::Client { + &self.client + } + + pub fn client_mut(&mut self) -> &mut tokio_postgres::Client { + &mut self.client + } + + pub fn close(self) -> impl Future> { + self.conn_task_handle + } +} diff --git a/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/src/lib.rs b/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/src/lib.rs new file mode 100644 index 00000000000..91b7dd3ec41 --- /dev/null +++ b/catalyst-gateway/crates/catalyst-chain-syncer/crates/postgres-util/src/lib.rs @@ -0,0 +1,28 @@ +use anyhow::Result; +use connection::Connection; + +pub use tokio_postgres; + +pub mod connection; + +pub async fn create_tables_if_not_present(conn_string: &str) -> Result<()> { + let conn = Connection::open(conn_string).await?; + + conn.client() + .batch_execute(include_str!("../sql/create_tables.sql")) + .await?; + conn.close().await?; + + Ok(()) +} + +pub async fn create_indexes(conn_string: &str) -> Result<()> { + let conn = Connection::open(conn_string).await?; + + conn.client() + .batch_execute(include_str!("../sql/create_indexes.sql")) + .await?; + conn.close().await?; + + Ok(()) +}