From 5b3d383d7a65b0fbe2a771fecf4313f5083be9ae Mon Sep 17 00:00:00 2001 From: pompon0 Date: Wed, 24 Jan 2024 09:13:13 +0100 Subject: [PATCH] made max_payload_size configurable (#60) The max_payload_size is checked in bft replica and also implies buffering limits in the network actor (both gossip and consensus networks). It turned out that hardcoding it was too optimistic strategy, so I've extracted it to a config, so that it is settable in runtime. Given the lesson learned, I'll be also deprecating the `ProtoFmt::max_size()` function soon. Additional changes: * added some logging useful for debugging the zksync-era tests and improved existing logging to always show error the cause, not just the leaf message. * removed `tracing::instrument` annotations which were logging non-critical/not-a-bug errors on level ERROR * made network crate depend on the storage crate to avoid the workaround way of implementing the get_block rpc * cleaned up the implementations of the rpcs to be more consistent * deduplicated the testonly helpers and moved them to the crates that they belong to --- node/Cargo.lock | 2 + node/actors/bft/src/config.rs | 7 +- node/actors/bft/src/leader/state_machine.rs | 8 + node/actors/bft/src/replica/block.rs | 9 +- node/actors/bft/src/replica/leader_prepare.rs | 3 +- node/actors/bft/src/replica/tests.rs | 7 +- node/actors/bft/src/testonly/fuzz.rs | 5 +- node/actors/bft/src/testonly/make.rs | 34 +- node/actors/bft/src/testonly/node.rs | 38 +- node/actors/bft/src/testonly/run.rs | 206 +++---- node/actors/bft/src/testonly/ut_harness.rs | 42 +- node/actors/executor/Cargo.toml | 1 + node/actors/executor/src/lib.rs | 34 +- node/actors/executor/src/testonly.rs | 17 +- node/actors/executor/src/tests.rs | 126 ++-- node/actors/network/Cargo.toml | 1 + .../network/src/consensus/handshake/mod.rs | 4 +- .../network/src/consensus/handshake/tests.rs | 6 +- node/actors/network/src/consensus/runner.rs | 62 +- node/actors/network/src/consensus/tests.rs | 133 ++-- node/actors/network/src/event.rs | 11 - node/actors/network/src/frame.rs | 19 +- .../network/src/gossip/handshake/mod.rs | 4 +- .../network/src/gossip/handshake/tests.rs | 7 +- node/actors/network/src/gossip/runner.rs | 336 ++++------ node/actors/network/src/gossip/state.rs | 86 ++- node/actors/network/src/gossip/tests.rs | 577 +++++++----------- node/actors/network/src/io.rs | 52 +- node/actors/network/src/mux/mod.rs | 5 +- .../actors/network/src/mux/reusable_stream.rs | 2 +- node/actors/network/src/mux/tests/mod.rs | 10 +- node/actors/network/src/preface.rs | 4 +- node/actors/network/src/proto/gossip.proto | 40 +- node/actors/network/src/rpc/consensus.rs | 8 - node/actors/network/src/rpc/get_block.rs | 62 ++ node/actors/network/src/rpc/mod.rs | 54 +- node/actors/network/src/rpc/ping.rs | 13 +- .../network/src/rpc/push_block_store_state.rs | 45 ++ ...dator_addrs.rs => push_validator_addrs.rs} | 40 +- node/actors/network/src/rpc/sync_blocks.rs | 179 ------ node/actors/network/src/rpc/testonly.rs | 43 +- node/actors/network/src/rpc/tests.rs | 24 +- node/actors/network/src/state.rs | 30 +- node/actors/network/src/testonly.rs | 224 ++++--- node/actors/network/src/tests.rs | 26 +- node/actors/sync_blocks/src/lib.rs | 28 +- .../sync_blocks/src/peers/tests/basics.rs | 89 ++- .../sync_blocks/src/peers/tests/fakes.rs | 40 +- .../actors/sync_blocks/src/peers/tests/mod.rs | 34 +- .../src/peers/tests/multiple_peers.rs | 32 +- .../sync_blocks/src/peers/tests/snapshots.rs | 42 +- .../sync_blocks/src/tests/end_to_end.rs | 146 ++--- node/actors/sync_blocks/src/tests/mod.rs | 234 +------ node/libs/concurrency/src/ctx/channel.rs | 5 + node/libs/concurrency/src/oneshot.rs | 12 + node/libs/concurrency/src/signal.rs | 8 +- node/libs/concurrency/src/sync/mod.rs | 6 +- node/libs/concurrency/src/testonly.rs | 24 +- node/libs/crypto/src/bn254/mod.rs | 5 + node/libs/crypto/src/ed25519/mod.rs | 16 +- node/libs/protobuf/src/testonly.rs | 2 +- node/libs/roles/src/node/keys.rs | 12 +- node/libs/roles/src/node/tests.rs | 4 +- .../roles/src/validator/keys/secret_key.rs | 7 +- node/libs/roles/src/validator/mod.rs | 3 + node/libs/roles/src/validator/testonly.rs | 124 +++- node/libs/roles/src/validator/tests.rs | 34 +- node/libs/storage/src/block_store/mod.rs | 7 +- node/libs/storage/src/testonly/mod.rs | 29 +- node/libs/storage/src/tests.rs | 46 +- node/tools/src/bin/keys.rs | 16 +- node/tools/src/bin/localnet_config.rs | 25 +- node/tools/src/config.rs | 17 +- node/tools/src/proto/mod.proto | 3 + node/tools/src/tests.rs | 14 +- 75 files changed, 1691 insertions(+), 2019 deletions(-) create mode 100644 node/actors/network/src/rpc/get_block.rs create mode 100644 node/actors/network/src/rpc/push_block_store_state.rs rename node/actors/network/src/rpc/{sync_validator_addrs.rs => push_validator_addrs.rs} (52%) delete mode 100644 node/actors/network/src/rpc/sync_blocks.rs diff --git a/node/Cargo.lock b/node/Cargo.lock index a869c53e..566f9ae1 100644 --- a/node/Cargo.lock +++ b/node/Cargo.lock @@ -2695,6 +2695,7 @@ dependencies = [ "zksync_consensus_storage", "zksync_consensus_sync_blocks", "zksync_consensus_utils", + "zksync_protobuf", ] [[package]] @@ -2718,6 +2719,7 @@ dependencies = [ "zksync_concurrency", "zksync_consensus_crypto", "zksync_consensus_roles", + "zksync_consensus_storage", "zksync_consensus_utils", "zksync_protobuf", "zksync_protobuf_build", diff --git a/node/actors/bft/src/config.rs b/node/actors/bft/src/config.rs index beeb32ea..c1a94b74 100644 --- a/node/actors/bft/src/config.rs +++ b/node/actors/bft/src/config.rs @@ -12,6 +12,9 @@ pub struct Config { pub secret_key: validator::SecretKey, /// A vector of public keys for all the validators in the network. pub validator_set: validator::ValidatorSet, + /// The maximum size of the payload of a block, in bytes. We will + /// reject blocks with payloads larger than this. + pub max_payload_size: usize, /// Block store. pub block_store: Arc, /// Replica store. @@ -21,10 +24,6 @@ pub struct Config { } impl Config { - /// The maximum size of the payload of a block, in bytes. We will - /// reject blocks with payloads larger than this. - pub(crate) const PAYLOAD_MAX_SIZE: usize = 500 * zksync_protobuf::kB; - /// Computes the validator for the given view. #[instrument(level = "trace", ret)] pub fn view_leader(&self, view_number: validator::ViewNumber) -> validator::PublicKey { diff --git a/node/actors/bft/src/leader/state_machine.rs b/node/actors/bft/src/leader/state_machine.rs index c59003e3..d37a87e5 100644 --- a/node/actors/bft/src/leader/state_machine.rs +++ b/node/actors/bft/src/leader/state_machine.rs @@ -173,6 +173,14 @@ impl StateMachine { .payload_manager .propose(ctx, highest_qc.header().number.next()) .await?; + if payload.0.len() > cfg.max_payload_size { + return Err(anyhow::format_err!( + "proposed payload too large: got {}B, max {}B", + payload.0.len(), + cfg.max_payload_size + ) + .into()); + } metrics::METRICS .leader_proposal_payload_size .observe(payload.0.len()); diff --git a/node/actors/bft/src/replica/block.rs b/node/actors/bft/src/replica/block.rs index fd91780b..75d2404c 100644 --- a/node/actors/bft/src/replica/block.rs +++ b/node/actors/bft/src/replica/block.rs @@ -1,5 +1,5 @@ use super::StateMachine; -use tracing::{info, instrument}; +use tracing::instrument; use zksync_concurrency::ctx; use zksync_consensus_roles::validator; @@ -30,9 +30,10 @@ impl StateMachine { justification: commit_qc.clone(), }; - info!( - "Finalized a block!\nFinal block: {:#?}", - block.header().hash() + tracing::info!( + "Finalized block {}: {:#?}", + block.header().number, + block.header().hash(), ); self.config .block_store diff --git a/node/actors/bft/src/replica/leader_prepare.rs b/node/actors/bft/src/replica/leader_prepare.rs index 0c143e01..50e9dcf4 100644 --- a/node/actors/bft/src/replica/leader_prepare.rs +++ b/node/actors/bft/src/replica/leader_prepare.rs @@ -1,5 +1,4 @@ use super::StateMachine; -use crate::Config; use std::collections::HashMap; use tracing::instrument; use zksync_concurrency::{ctx, error::Wrap}; @@ -229,7 +228,7 @@ impl StateMachine { // The leader proposed a new block. Some(payload) => { // Check that the payload doesn't exceed the maximum size. - if payload.0.len() > Config::PAYLOAD_MAX_SIZE { + if payload.0.len() > self.config.max_payload_size { return Err(Error::ProposalOversizedPayload { payload_size: payload.0.len(), header: message.proposal, diff --git a/node/actors/bft/src/replica/tests.rs b/node/actors/bft/src/replica/tests.rs index 107198f1..0cd2f731 100644 --- a/node/actors/bft/src/replica/tests.rs +++ b/node/actors/bft/src/replica/tests.rs @@ -1,5 +1,8 @@ use super::{leader_commit, leader_prepare}; -use crate::{testonly, testonly::ut_harness::UTHarness, Config}; +use crate::{ + testonly, + testonly::ut_harness::{UTHarness, MAX_PAYLOAD_SIZE}, +}; use assert_matches::assert_matches; use rand::Rng; use zksync_concurrency::{ctx, scope}; @@ -271,7 +274,7 @@ async fn leader_prepare_proposal_oversized_payload() { let (mut util, runner) = UTHarness::new(ctx, 1).await; s.spawn_bg(runner.run(ctx)); - let payload_oversize = Config::PAYLOAD_MAX_SIZE + 1; + let payload_oversize = MAX_PAYLOAD_SIZE + 1; let payload_vec = vec![0; payload_oversize]; let mut leader_prepare = util.new_leader_prepare(ctx).await.msg; leader_prepare.proposal_payload = Some(Payload(payload_vec)); diff --git a/node/actors/bft/src/testonly/fuzz.rs b/node/actors/bft/src/testonly/fuzz.rs index bfe90b56..2a48b72e 100644 --- a/node/actors/bft/src/testonly/fuzz.rs +++ b/node/actors/bft/src/testonly/fuzz.rs @@ -1,3 +1,4 @@ +use crate::testonly::node::MAX_PAYLOAD_SIZE; use rand::{seq::SliceRandom, Rng}; use zksync_consensus_roles::validator; @@ -155,10 +156,10 @@ impl Fuzz for validator::Signers { impl Fuzz for validator::Payload { fn mutate(&mut self, rng: &mut impl Rng) { // Push bytes into the payload until it exceeds the limit. - let num_bytes = crate::Config::PAYLOAD_MAX_SIZE + 1 - self.0.len(); + let num_bytes = MAX_PAYLOAD_SIZE - self.0.len() + 1; let bytes: Vec = (0..num_bytes).map(|_| rng.gen()).collect(); self.0.extend_from_slice(&bytes); - assert!(self.0.len() > crate::Config::PAYLOAD_MAX_SIZE); + assert!(self.0.len() > MAX_PAYLOAD_SIZE); } } diff --git a/node/actors/bft/src/testonly/make.rs b/node/actors/bft/src/testonly/make.rs index 61506568..d2b49113 100644 --- a/node/actors/bft/src/testonly/make.rs +++ b/node/actors/bft/src/testonly/make.rs @@ -1,12 +1,12 @@ //! This module contains utilities that are only meant for testing purposes. -use crate::{Config, PayloadManager}; +use crate::PayloadManager; use rand::Rng as _; use zksync_concurrency::ctx; use zksync_consensus_roles::validator; -/// Produces random payload. +/// Produces random payload of a given size. #[derive(Debug)] -pub struct RandomPayload; +pub struct RandomPayload(pub usize); #[async_trait::async_trait] impl PayloadManager for RandomPayload { @@ -15,7 +15,7 @@ impl PayloadManager for RandomPayload { ctx: &ctx::Ctx, _number: validator::BlockNumber, ) -> ctx::Result { - let mut payload = validator::Payload(vec![0; Config::PAYLOAD_MAX_SIZE]); + let mut payload = validator::Payload(vec![0; self.0]); ctx.rng().fill(&mut payload.0[..]); Ok(payload) } @@ -77,29 +77,3 @@ impl PayloadManager for RejectPayload { Err(anyhow::anyhow!("invalid payload").into()) } } - -/// Creates a genesis block with the given payload -/// and a validator set for the chain. -pub fn make_genesis( - keys: &[validator::SecretKey], - payload: validator::Payload, - block_number: validator::BlockNumber, -) -> (validator::FinalBlock, validator::ValidatorSet) { - let header = validator::BlockHeader::genesis(payload.hash(), block_number); - let validator_set = validator::ValidatorSet::new(keys.iter().map(|k| k.public())).unwrap(); - let signed_messages: Vec<_> = keys - .iter() - .map(|sk| { - sk.sign_msg(validator::ReplicaCommit { - protocol_version: validator::ProtocolVersion::EARLIEST, - view: validator::ViewNumber(0), - proposal: header, - }) - }) - .collect(); - let final_block = validator::FinalBlock { - payload, - justification: validator::CommitQC::from(&signed_messages, &validator_set).unwrap(), - }; - (final_block, validator_set) -} diff --git a/node/actors/bft/src/testonly/node.rs b/node/actors/bft/src/testonly/node.rs index 20c08b5f..6b221818 100644 --- a/node/actors/bft/src/testonly/node.rs +++ b/node/actors/bft/src/testonly/node.rs @@ -1,12 +1,16 @@ use super::Fuzz; use crate::{io, testonly, PayloadManager}; +use anyhow::Context as _; use rand::Rng; use std::sync::Arc; use zksync_concurrency::{ctx, scope}; use zksync_consensus_network as network; use zksync_consensus_network::io::ConsensusInputMessage; use zksync_consensus_storage as storage; -use zksync_consensus_utils::pipe::DispatcherPipe; +use zksync_consensus_storage::testonly::in_memory; +use zksync_consensus_utils::pipe; + +pub(crate) const MAX_PAYLOAD_SIZE: usize = 1000; /// Enum representing the behavior of the node. #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -29,32 +33,50 @@ impl Behavior { pub(crate) fn payload_manager(&self) -> Box { match self { Self::HonestNotProposing => Box::new(testonly::PendingPayload), - _ => Box::new(testonly::RandomPayload), + _ => Box::new(testonly::RandomPayload(MAX_PAYLOAD_SIZE)), } } } /// Struct representing a node. pub(super) struct Node { - pub(crate) net: network::testonly::Instance, + pub(crate) net: network::Config, pub(crate) behavior: Behavior, pub(crate) block_store: Arc, } impl Node { /// Runs a mock executor. - pub(crate) async fn run_executor( + pub(crate) async fn run( &self, ctx: &ctx::Ctx, - consensus_pipe: DispatcherPipe, - network_pipe: DispatcherPipe, + network_pipe: &mut pipe::DispatcherPipe< + network::io::InputMessage, + network::io::OutputMessage, + >, ) -> anyhow::Result<()> { let rng = &mut ctx.rng(); - let mut net_recv = network_pipe.recv; - let net_send = network_pipe.send; + let net_recv = &mut network_pipe.recv; + let net_send = &mut network_pipe.send; + let (consensus_actor_pipe, consensus_pipe) = pipe::new(); let mut con_recv = consensus_pipe.recv; let con_send = consensus_pipe.send; scope::run!(ctx, |ctx, s| async { + s.spawn(async { + let validator_key = self.net.consensus.as_ref().unwrap().key.clone(); + let validator_set = self.net.validators.clone(); + crate::Config { + secret_key: validator_key.clone(), + validator_set, + block_store: self.block_store.clone(), + replica_store: Box::new(in_memory::ReplicaStore::default()), + payload_manager: self.behavior.payload_manager(), + max_payload_size: MAX_PAYLOAD_SIZE, + } + .run(ctx, consensus_actor_pipe) + .await + .context("consensus.run()") + }); s.spawn(async { while let Ok(network_message) = net_recv.recv(ctx).await { match network_message { diff --git a/node/actors/bft/src/testonly/run.rs b/node/actors/bft/src/testonly/run.rs index 5072a71d..e8f479a6 100644 --- a/node/actors/bft/src/testonly/run.rs +++ b/node/actors/bft/src/testonly/run.rs @@ -1,12 +1,10 @@ use super::{Behavior, Node}; -use crate::{testonly, Config}; -use anyhow::Context; use std::collections::HashMap; use tracing::Instrument as _; -use zksync_concurrency::{ctx, oneshot, scope, signal, sync}; +use zksync_concurrency::{ctx, oneshot, scope, sync}; use zksync_consensus_network as network; use zksync_consensus_roles::validator; -use zksync_consensus_storage::{testonly::in_memory, BlockStore}; +use zksync_consensus_storage::testonly::new_store; use zksync_consensus_utils::pipe; #[derive(Clone, Copy)] @@ -27,155 +25,121 @@ impl Test { /// Run a test with the given parameters. pub(crate) async fn run(&self, ctx: &ctx::Ctx) -> anyhow::Result<()> { let rng = &mut ctx.rng(); - let nets: Vec<_> = network::testonly::Instance::new(rng, self.nodes.len(), 1); - let keys: Vec<_> = nets - .iter() - .map(|node| node.consensus_config().key.clone()) - .collect(); - let (genesis_block, _) = - testonly::make_genesis(&keys, validator::Payload(vec![]), validator::BlockNumber(0)); + let setup = validator::testonly::GenesisSetup::new(rng, self.nodes.len()); + let nets: Vec<_> = network::testonly::new_configs(rng, &setup, 1); let mut nodes = vec![]; - let mut store_runners = vec![]; - for (i, net) in nets.into_iter().enumerate() { - let block_store = Box::new(in_memory::BlockStore::new(genesis_block.clone())); - let (block_store, runner) = BlockStore::new(ctx, block_store).await?; - store_runners.push(runner); - nodes.push(Node { - net, - behavior: self.nodes[i], - block_store, - }); - } - - // Get only the honest replicas. - let honest: Vec<_> = nodes - .iter() - .filter(|node| node.behavior == Behavior::Honest) - .collect(); - assert!(!honest.is_empty()); - - // Run the nodes until all honest nodes store enough finalized blocks. + let mut honest = vec![]; scope::run!(ctx, |ctx, s| async { - for runner in store_runners { + for (i, net) in nets.into_iter().enumerate() { + let (store, runner) = new_store(ctx, &setup.blocks[0]).await; s.spawn_bg(runner.run(ctx)); + if self.nodes[i] == Behavior::Honest { + honest.push(store.clone()); + } + nodes.push(Node { + net, + behavior: self.nodes[i], + block_store: store, + }); } + assert!(!honest.is_empty()); s.spawn_bg(run_nodes(ctx, self.network, &nodes)); + + // Run the nodes until all honest nodes store enough finalized blocks. let want_block = validator::BlockNumber(self.blocks_to_finalize as u64); - for n in &honest { - sync::wait_for(ctx, &mut n.block_store.subscribe(), |state| { + for store in &honest { + sync::wait_for(ctx, &mut store.subscribe(), |state| { state.next() > want_block }) .await?; } - Ok(()) - }) - .await?; - // Check that the stored blocks are consistent. - for i in 0..self.blocks_to_finalize as u64 + 1 { - let i = validator::BlockNumber(i); - let want = honest[0].block_store.block(ctx, i).await?; - for n in &honest[1..] { - assert_eq!(want, n.block_store.block(ctx, i).await?); + // Check that the stored blocks are consistent. + for i in 0..self.blocks_to_finalize as u64 + 1 { + let i = validator::BlockNumber(i); + let want = honest[0].block(ctx, i).await?; + for store in &honest[1..] { + assert_eq!(want, store.block(ctx, i).await?); + } } - } - Ok(()) + Ok(()) + }) + .await } } /// Run a set of nodes. -async fn run_nodes(ctx: &ctx::Ctx, network: Network, nodes: &[Node]) -> anyhow::Result<()> { - let network_ready = signal::Once::new(); - let mut network_pipes = HashMap::new(); - let mut network_send = HashMap::new(); - let mut network_recv = HashMap::new(); +async fn run_nodes(ctx: &ctx::Ctx, network: Network, specs: &[Node]) -> anyhow::Result<()> { scope::run!(ctx, |ctx, s| async { - for (i, node) in nodes.iter().enumerate() { - let consensus_config = node.net.consensus_config(); - let validator_key = consensus_config.key.clone(); - let validator_set = node.net.to_config().validators; - - let (consensus_actor_pipe, consensus_pipe) = pipe::new(); - let (network_actor_pipe, network_pipe) = pipe::new(); - network_pipes.insert(validator_key.public(), network_actor_pipe); - s.spawn( - async { - scope::run!(ctx, |ctx, s| async { - network_ready.recv(ctx).await?; - s.spawn(async { - Config { - secret_key: validator_key, - validator_set, - block_store: node.block_store.clone(), - replica_store: Box::new(in_memory::ReplicaStore::default()), - payload_manager: node.behavior.payload_manager(), - } - .run(ctx, consensus_actor_pipe) - .await - .context("consensus.run()") - }); - node.run_executor(ctx, consensus_pipe, network_pipe) - .await - .context("executor.run()") - }) - .await - } - .instrument(tracing::info_span!("node", i)), - ); - } match network { Network::Real => { - for (i, node) in nodes.iter().enumerate() { - let state = node.net.state().clone(); - let pipe = network_pipes - .remove(&node.net.consensus_config().key.public()) - .unwrap(); + let mut nodes = vec![]; + for (i, spec) in specs.iter().enumerate() { + let (node, runner) = network::testonly::Instance::new( + spec.net.clone(), + spec.block_store.clone(), + ); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); + nodes.push(node); + } + network::testonly::instant_network(ctx, nodes.iter()).await?; + for (i, node) in nodes.into_iter().enumerate() { + let spec = &specs[i]; s.spawn( async { - network::run_network(ctx, state, pipe) - .await - .context("network") + let mut node = node; + spec.run(ctx, node.pipe()).await } .instrument(tracing::info_span!("node", i)), ); } - network::testonly::instant_network(ctx, nodes.iter().map(|n| &n.net)).await?; - network_ready.send(); - Ok(()) } Network::Mock => { - for (key, pipe) in network_pipes { - network_send.insert(key.clone(), pipe.send); - network_recv.insert(key.clone(), pipe.recv); + let mut sends = HashMap::new(); + let mut recvs = vec![]; + for (i, spec) in specs.iter().enumerate() { + let (actor_pipe, pipe) = pipe::new(); + let key = spec.net.consensus.as_ref().unwrap().key.public(); + sends.insert(key, actor_pipe.send); + recvs.push(actor_pipe.recv); + s.spawn( + async { + let mut pipe = pipe; + spec.run(ctx, &mut pipe).await + } + .instrument(tracing::info_span!("node", i)), + ); } - for (_, recv) in network_recv { - s.spawn(async { - use zksync_consensus_network::io; - let mut recv = recv; - - while let Ok(io::InputMessage::Consensus(message)) = recv.recv(ctx).await { - let msg = || { - io::OutputMessage::Consensus(io::ConsensusReq { - msg: message.message.clone(), - ack: oneshot::channel().0, - }) - }; - match message.recipient { - io::Target::Validator(v) => { - network_send.get(&v).unwrap().send(msg()) - } - io::Target::Broadcast => { - network_send.values().for_each(|s| s.send(msg())) + scope::run!(ctx, |ctx, s| async { + for recv in recvs { + s.spawn(async { + use zksync_consensus_network::io; + let mut recv = recv; + while let Ok(io::InputMessage::Consensus(message)) = + recv.recv(ctx).await + { + let msg = || { + io::OutputMessage::Consensus(io::ConsensusReq { + msg: message.message.clone(), + ack: oneshot::channel().0, + }) + }; + match message.recipient { + io::Target::Validator(v) => sends.get(&v).unwrap().send(msg()), + io::Target::Broadcast => { + sends.values().for_each(|s| s.send(msg())) + } } } - } - Ok(()) - }); - } - network_ready.send(); - Ok(()) + Ok(()) + }); + } + anyhow::Ok(()) + }) + .await?; } } + Ok(()) }) .await } diff --git a/node/actors/bft/src/testonly/ut_harness.rs b/node/actors/bft/src/testonly/ut_harness.rs index e39f9086..f3d1b164 100644 --- a/node/actors/bft/src/testonly/ut_harness.rs +++ b/node/actors/bft/src/testonly/ut_harness.rs @@ -7,17 +7,21 @@ use crate::{ testonly, Config, PayloadManager, }; use assert_matches::assert_matches; -use rand::Rng; use std::{cmp::Ordering, sync::Arc}; use zksync_concurrency::ctx; -use zksync_consensus_network::io::ConsensusInputMessage; +use zksync_consensus_network as network; use zksync_consensus_roles::validator::{ - self, CommitQC, LeaderCommit, LeaderPrepare, Payload, Phase, PrepareQC, ReplicaCommit, - ReplicaPrepare, SecretKey, Signed, ViewNumber, + self, CommitQC, LeaderCommit, LeaderPrepare, Phase, PrepareQC, ReplicaCommit, ReplicaPrepare, + SecretKey, Signed, ViewNumber, +}; +use zksync_consensus_storage::{ + testonly::{in_memory, new_store}, + BlockStoreRunner, }; -use zksync_consensus_storage::{testonly::in_memory, BlockStore, BlockStoreRunner}; use zksync_consensus_utils::enum_util::Variant; +pub(crate) const MAX_PAYLOAD_SIZE: usize = 1000; + /// `UTHarness` provides various utilities for unit tests. /// It is designed to simplify the setup and execution of test cases by encapsulating /// common testing functionality. @@ -37,7 +41,12 @@ impl UTHarness { ctx: &ctx::Ctx, num_validators: usize, ) -> (UTHarness, BlockStoreRunner) { - Self::new_with_payload(ctx, num_validators, Box::new(testonly::RandomPayload)).await + Self::new_with_payload( + ctx, + num_validators, + Box::new(testonly::RandomPayload(MAX_PAYLOAD_SIZE)), + ) + .await } pub(crate) async fn new_with_payload( @@ -45,23 +54,18 @@ impl UTHarness { num_validators: usize, payload_manager: Box, ) -> (UTHarness, BlockStoreRunner) { - let mut rng = ctx.rng(); - let keys: Vec<_> = (0..num_validators).map(|_| rng.gen()).collect(); - let (genesis, validator_set) = - crate::testonly::make_genesis(&keys, Payload(vec![]), validator::BlockNumber(0)); - - // Initialize the storage. - let block_store = Box::new(in_memory::BlockStore::new(genesis)); - let (block_store, runner) = BlockStore::new(ctx, block_store).await.unwrap(); - // Create the pipe. + let rng = &mut ctx.rng(); + let setup = validator::testonly::GenesisSetup::new(rng, num_validators); + let (block_store, runner) = new_store(ctx, &setup.blocks[0]).await; let (send, recv) = ctx::channel::unbounded(); let cfg = Arc::new(Config { - secret_key: keys[0].clone(), - validator_set, + secret_key: setup.keys[0].clone(), + validator_set: setup.validator_set(), block_store: block_store.clone(), replica_store: Box::new(in_memory::ReplicaStore::default()), payload_manager, + max_payload_size: MAX_PAYLOAD_SIZE, }); let leader = leader::StateMachine::new(ctx, cfg.clone(), send.clone()); let replica = replica::StateMachine::start(ctx, cfg.clone(), send.clone()) @@ -71,7 +75,7 @@ impl UTHarness { leader, replica, pipe: recv, - keys, + keys: setup.keys, }; let _: Signed = this.try_recv().unwrap(); (this, runner) @@ -264,7 +268,7 @@ impl UTHarness { fn try_recv>(&mut self) -> Option> { self.pipe.try_recv().map(|message| match message { - OutputMessage::Network(ConsensusInputMessage { message, .. }) => { + OutputMessage::Network(network::io::ConsensusInputMessage { message, .. }) => { message.cast().unwrap() } }) diff --git a/node/actors/executor/Cargo.toml b/node/actors/executor/Cargo.toml index 2746734e..3fec785b 100644 --- a/node/actors/executor/Cargo.toml +++ b/node/actors/executor/Cargo.toml @@ -15,6 +15,7 @@ zksync_consensus_roles.workspace = true zksync_consensus_storage.workspace = true zksync_consensus_sync_blocks.workspace = true zksync_consensus_utils.workspace = true +zksync_protobuf.workspace = true anyhow.workspace = true rand.workspace = true diff --git a/node/actors/executor/src/lib.rs b/node/actors/executor/src/lib.rs index 04d8a294..08065a2a 100644 --- a/node/actors/executor/src/lib.rs +++ b/node/actors/executor/src/lib.rs @@ -6,13 +6,14 @@ use std::{ fmt, sync::Arc, }; -use zksync_concurrency::{ctx, net, scope, sync}; +use zksync_concurrency::{ctx, net, scope}; use zksync_consensus_bft as bft; use zksync_consensus_network as network; use zksync_consensus_roles::{node, validator}; -use zksync_consensus_storage::{BlockStore, BlockStoreState, ReplicaStore}; +use zksync_consensus_storage::{BlockStore, ReplicaStore}; use zksync_consensus_sync_blocks as sync_blocks; use zksync_consensus_utils::pipe; +use zksync_protobuf::kB; mod io; pub mod testonly; @@ -48,13 +49,15 @@ pub struct Config { /// Static specification of validators for Proof of Authority. Should be deprecated once we move /// to Proof of Stake. pub validators: validator::ValidatorSet, + /// Maximal size of the block payload. + pub max_payload_size: usize, /// Key of this node. It uniquely identifies the node. /// It should match the secret key provided in the `node_key` file. pub node_key: node::SecretKey, /// Limit on the number of inbound connections outside /// of the `static_inbound` set. - pub gossip_dynamic_inbound_limit: u64, + pub gossip_dynamic_inbound_limit: usize, /// Inbound connections that should be unconditionally accepted. pub gossip_static_inbound: HashSet, /// Outbound connections that the node should actively try to @@ -70,7 +73,6 @@ impl Config { dynamic_inbound_limit: self.gossip_dynamic_inbound_limit, static_inbound: self.gossip_static_inbound.clone(), static_outbound: self.gossip_static_outbound.clone(), - enable_pings: true, } } } @@ -86,14 +88,6 @@ pub struct Executor { pub validator: Option, } -/// Converts BlockStoreState to isomorphic network::io::SyncState. -fn to_sync_state(state: BlockStoreState) -> network::io::SyncState { - network::io::SyncState { - first_stored_block: state.first, - last_stored_block: state.last, - } -} - impl Executor { /// Extracts a network crate config. fn network_config(&self) -> network::Config { @@ -102,6 +96,8 @@ impl Executor { validators: self.config.validators.clone(), gossip: self.config.gossip(), consensus: self.validator.as_ref().map(|v| v.config.clone()), + enable_pings: true, + max_block_size: self.config.max_payload_size.saturating_add(kB), } } @@ -138,23 +134,12 @@ impl Executor { // Create each of the actors. let validator_set = self.config.validators; - let mut block_store_state = self.block_store.subscribe(); - let sync_state = sync::watch::channel(to_sync_state(block_store_state.borrow().clone())).0; tracing::debug!("Starting actors in separate threads."); scope::run!(ctx, |ctx, s| async { - s.spawn_bg(async { - // Task forwarding changes from block_store_state to sync_state. - // Alternatively we can make network depend on the storage directly. - while let Ok(state) = sync::changed(ctx, &mut block_store_state).await { - sync_state.send_replace(to_sync_state(state.clone())); - } - Ok(()) - }); - s.spawn_blocking(|| dispatcher.run(ctx).context("IO Dispatcher stopped")); s.spawn(async { - let state = network::State::new(network_config, None, Some(sync_state.subscribe())) + let state = network::State::new(network_config, self.block_store.clone(), None) .context("Invalid network config")?; state.register_metrics(); network::run_network(ctx, state, network_actor_pipe) @@ -170,6 +155,7 @@ impl Executor { block_store: self.block_store.clone(), replica_store: validator.replica_store, payload_manager: validator.payload_manager, + max_payload_size: self.config.max_payload_size, } .run(ctx, consensus_actor_pipe) .await diff --git a/node/actors/executor/src/testonly.rs b/node/actors/executor/src/testonly.rs index e40b4d69..b40caa66 100644 --- a/node/actors/executor/src/testonly.rs +++ b/node/actors/executor/src/testonly.rs @@ -2,8 +2,8 @@ use crate::{Config, ValidatorConfig}; use rand::Rng; use zksync_concurrency::net; -use zksync_consensus_network::testonly::Instance; -use zksync_consensus_roles::validator; +use zksync_consensus_network as network; +use zksync_consensus_roles::validator::testonly::GenesisSetup; /// Full validator configuration. #[derive(Debug, Clone)] @@ -13,6 +13,8 @@ pub struct ValidatorNode { pub node: Config, /// Consensus configuration of the validator. pub validator: ValidatorConfig, + /// Genesis configuration (validator set & initial blocks). + pub setup: GenesisSetup, } /// Creates a new full node and configures this validator to accept incoming connections from it. @@ -27,20 +29,25 @@ pub fn connect_full_node(rng: &mut impl Rng, node: &mut Config) -> Config { impl ValidatorNode { /// Generates a validator config for a network with a single validator. - pub fn for_single_validator(rng: &mut impl Rng) -> Self { - let net_config = Instance::new_configs(rng, 1, 0).pop().unwrap(); + pub fn new(rng: &mut impl Rng) -> Self { + let setup = GenesisSetup::new(rng, 1); + let net_config = network::testonly::new_configs(rng, &setup, 0) + .pop() + .unwrap(); let validator = net_config.consensus.unwrap(); let gossip = net_config.gossip; Self { node: Config { server_addr: *net_config.server_addr, - validators: validator::ValidatorSet::new([validator.key.public()]).unwrap(), + validators: setup.validator_set(), node_key: gossip.key, gossip_dynamic_inbound_limit: gossip.dynamic_inbound_limit, gossip_static_inbound: gossip.static_inbound, gossip_static_outbound: gossip.static_outbound, + max_payload_size: usize::MAX, }, validator, + setup, } } } diff --git a/node/actors/executor/src/tests.rs b/node/actors/executor/src/tests.rs index ac3baf3b..d85ca660 100644 --- a/node/actors/executor/src/tests.rs +++ b/node/actors/executor/src/tests.rs @@ -2,19 +2,18 @@ use super::*; use crate::testonly::{connect_full_node, ValidatorNode}; -use rand::Rng; -use std::iter; use test_casing::test_casing; -use zksync_concurrency::{sync, testonly::abort_on_panic, time}; -use zksync_consensus_bft::{testonly, PROTOCOL_VERSION}; -use zksync_consensus_roles::validator::{BlockNumber, FinalBlock, Payload}; -use zksync_consensus_storage::{testonly::in_memory, BlockStore, BlockStoreRunner}; - -async fn make_store(ctx: &ctx::Ctx, genesis: FinalBlock) -> (Arc, BlockStoreRunner) { - BlockStore::new(ctx, Box::new(in_memory::BlockStore::new(genesis))) - .await - .unwrap() -} +use zksync_concurrency::{ + sync, + testonly::{abort_on_panic, set_timeout}, + time, +}; +use zksync_consensus_bft as bft; +use zksync_consensus_roles::validator::BlockNumber; +use zksync_consensus_storage::{ + testonly::{in_memory, new_store}, + BlockStore, +}; impl Config { fn into_executor(self, block_store: Arc) -> Executor { @@ -27,30 +26,6 @@ impl Config { } impl ValidatorNode { - fn gen_blocks<'a>(&'a self, rng: &'a mut impl Rng) -> impl 'a + Iterator { - let (genesis_block, _) = testonly::make_genesis( - &[self.validator.key.clone()], - Payload(vec![]), - BlockNumber(0), - ); - let validators = &self.node.validators; - iter::successors(Some(genesis_block), |parent| { - let payload: Payload = rng.gen(); - let header = validator::BlockHeader { - parent: parent.header().hash(), - number: parent.header().number.next(), - payload: payload.hash(), - }; - let commit = self.validator.key.sign_msg(validator::ReplicaCommit { - protocol_version: PROTOCOL_VERSION, - view: validator::ViewNumber(header.number.0), - proposal: header, - }); - let justification = validator::CommitQC::from(&[commit], validators).unwrap(); - Some(FinalBlock::new(payload, justification)) - }) - } - fn into_executor(self, block_store: Arc) -> Executor { Executor { config: self.node, @@ -58,7 +33,7 @@ impl ValidatorNode { validator: Some(Validator { config: self.validator, replica_store: Box::new(in_memory::ReplicaStore::default()), - payload_manager: Box::new(testonly::RandomPayload), + payload_manager: Box::new(bft::testonly::RandomPayload(1000)), }), } } @@ -70,9 +45,8 @@ async fn executing_single_validator() { let ctx = &ctx::root(); let rng = &mut ctx.rng(); - let validator = ValidatorNode::for_single_validator(rng); - let genesis_block = validator.gen_blocks(rng).next().unwrap(); - let (storage, runner) = make_store(ctx, genesis_block.clone()).await; + let validator = ValidatorNode::new(rng); + let (storage, runner) = new_store(ctx, &validator.setup.blocks[0]).await; let executor = validator.into_executor(storage.clone()); scope::run!(ctx, |ctx, s| async { @@ -92,12 +66,10 @@ async fn executing_validator_and_full_node() { let ctx = &ctx::test_root(&ctx::AffineClock::new(20.0)); let rng = &mut ctx.rng(); - let mut validator = ValidatorNode::for_single_validator(rng); + let mut validator = ValidatorNode::new(rng); let full_node = connect_full_node(rng, &mut validator.node); - - let genesis_block = validator.gen_blocks(rng).next().unwrap(); - let (validator_storage, validator_runner) = make_store(ctx, genesis_block.clone()).await; - let (full_node_storage, full_node_runner) = make_store(ctx, genesis_block.clone()).await; + let (validator_storage, validator_runner) = new_store(ctx, &validator.setup.blocks[0]).await; + let (full_node_storage, full_node_runner) = new_store(ctx, &validator.setup.blocks[0]).await; let validator = validator.into_executor(validator_storage.clone()); let full_node = full_node.into_executor(full_node_storage.clone()); @@ -107,11 +79,9 @@ async fn executing_validator_and_full_node() { s.spawn_bg(full_node_runner.run(ctx)); s.spawn_bg(validator.run(ctx)); s.spawn_bg(full_node.run(ctx)); - let want = BlockNumber(5); - sync::wait_for(ctx, &mut full_node_storage.subscribe(), |state| { - state.next() > want - }) - .await?; + full_node_storage + .wait_until_persisted(ctx, BlockNumber(5)) + .await?; Ok(()) }) .await @@ -122,64 +92,56 @@ async fn executing_validator_and_full_node() { #[tokio::test] async fn syncing_full_node_from_snapshot(delay_block_storage: bool) { abort_on_panic(); + let _guard = set_timeout(time::Duration::seconds(10)); + let ctx = &ctx::test_root(&ctx::AffineClock::new(20.0)); let rng = &mut ctx.rng(); - let mut validator = ValidatorNode::for_single_validator(rng); - let full_node = connect_full_node(rng, &mut validator.node); - - let blocks: Vec<_> = validator.gen_blocks(rng).take(11).collect(); - let (validator_storage, validator_runner) = make_store(ctx, blocks[0].clone()).await; - let validator = validator.node.into_executor(validator_storage.clone()); + let mut validator = ValidatorNode::new(rng); + validator.setup.push_blocks(rng, 10); + let node2 = connect_full_node(rng, &mut validator.node); - // Start a full node from a snapshot. - let (full_node_storage, full_node_runner) = make_store(ctx, blocks[4].clone()).await; + let (store1, store1_runner) = new_store(ctx, &validator.setup.blocks[0]).await; + // Node2 will start from a snapshot. + let (store2, store2_runner) = new_store(ctx, &validator.setup.blocks[4]).await; - let full_node = Executor { - config: full_node, - block_store: full_node_storage.clone(), + // We spawn 2 non-validator nodes. We will simulate blocks appearing in storage of node1, + // and will expect them to be propagated to node2. + let node1 = validator.node.into_executor(store1.clone()); + let node2 = Executor { + config: node2, + block_store: store2.clone(), validator: None, }; scope::run!(ctx, |ctx, s| async { - s.spawn_bg(validator_runner.run(ctx)); - s.spawn_bg(full_node_runner.run(ctx)); + s.spawn_bg(store1_runner.run(ctx)); + s.spawn_bg(store2_runner.run(ctx)); if !delay_block_storage { // Instead of running consensus on the validator, add the generated blocks manually. - for block in &blocks { - validator_storage - .queue_block(ctx, block.clone()) - .await - .unwrap(); + for block in &validator.setup.blocks[1..] { + store1.queue_block(ctx, block.clone()).await.unwrap(); } } - s.spawn_bg(validator.run(ctx)); - s.spawn_bg(full_node.run(ctx)); + s.spawn_bg(node1.run(ctx)); + s.spawn_bg(node2.run(ctx)); if delay_block_storage { // Emulate the validator gradually adding new blocks to the storage. s.spawn_bg(async { - for block in &blocks[1..] { + for block in &validator.setup.blocks[1..] { ctx.sleep(time::Duration::milliseconds(500)).await?; - validator_storage.queue_block(ctx, block.clone()).await?; + store1.queue_block(ctx, block.clone()).await?; } Ok(()) }); } - sync::wait_for(ctx, &mut full_node_storage.subscribe(), |state| { - let last = state.last.header().number; - tracing::trace!(%last, "Full node updated last block"); - last >= BlockNumber(10) - }) - .await - .unwrap(); + store2.wait_until_persisted(ctx, BlockNumber(10)).await?; // Check that the node didn't receive any blocks with number lesser than the initial snapshot block. for lesser_block_number in 0..3 { - let block = full_node_storage - .block(ctx, BlockNumber(lesser_block_number)) - .await?; + let block = store2.block(ctx, BlockNumber(lesser_block_number)).await?; assert!(block.is_none()); } anyhow::Ok(()) diff --git a/node/actors/network/Cargo.toml b/node/actors/network/Cargo.toml index 3f9bb95c..0edd257c 100644 --- a/node/actors/network/Cargo.toml +++ b/node/actors/network/Cargo.toml @@ -10,6 +10,7 @@ license.workspace = true zksync_concurrency.workspace = true zksync_consensus_crypto.workspace = true zksync_consensus_roles.workspace = true +zksync_consensus_storage.workspace = true zksync_consensus_utils.workspace = true zksync_protobuf.workspace = true diff --git a/node/actors/network/src/consensus/handshake/mod.rs b/node/actors/network/src/consensus/handshake/mod.rs index f6218cd9..9536c2d3 100644 --- a/node/actors/network/src/consensus/handshake/mod.rs +++ b/node/actors/network/src/consensus/handshake/mod.rs @@ -65,7 +65,7 @@ pub(super) async fn outbound( ) .await .map_err(Error::Stream)?; - let h: Handshake = frame::recv_proto(ctx, stream) + let h: Handshake = frame::recv_proto(ctx, stream, Handshake::max_size()) .await .map_err(Error::Stream)?; if h.session_id.msg != session_id { @@ -85,7 +85,7 @@ pub(super) async fn inbound( ) -> Result { let ctx = &ctx.with_timeout(TIMEOUT); let session_id = node::SessionId(stream.id().encode()); - let h: Handshake = frame::recv_proto(ctx, stream) + let h: Handshake = frame::recv_proto(ctx, stream, Handshake::max_size()) .await .map_err(Error::Stream)?; if h.session_id.msg != session_id.clone() { diff --git a/node/actors/network/src/consensus/handshake/tests.rs b/node/actors/network/src/consensus/handshake/tests.rs index 747844fe..23bd75cd 100644 --- a/node/actors/network/src/consensus/handshake/tests.rs +++ b/node/actors/network/src/consensus/handshake/tests.rs @@ -8,7 +8,7 @@ use zksync_protobuf::testonly::test_encode_random; #[test] fn test_schema_encode_decode() { let rng = &mut ctx::test_root(&ctx::RealClock).rng(); - test_encode_random::<_, Handshake>(rng); + test_encode_random::(rng); } #[tokio::test] @@ -58,7 +58,7 @@ async fn test_session_id_mismatch() { let (mut s1, s2) = noise::testonly::pipe(ctx).await; s.spawn_bg(async { let mut s2 = s2; - let _: Handshake = frame::recv_proto(ctx, &mut s2).await?; + let _: Handshake = frame::recv_proto(ctx, &mut s2, Handshake::max_size()).await?; frame::send_proto( ctx, &mut s2, @@ -122,7 +122,7 @@ async fn test_invalid_signature() { let (mut s0, s1) = noise::testonly::pipe(ctx).await; s.spawn_bg(async { let mut s1 = s1; - let mut h: Handshake = frame::recv_proto(ctx, &mut s1).await?; + let mut h: Handshake = frame::recv_proto(ctx, &mut s1, Handshake::max_size()).await?; h.session_id.key = key1.public(); frame::send_proto(ctx, &mut s1, &h).await?; Ok(()) diff --git a/node/actors/network/src/consensus/runner.rs b/node/actors/network/src/consensus/runner.rs index 9d1c9995..ca988b0c 100644 --- a/node/actors/network/src/consensus/runner.rs +++ b/node/actors/network/src/consensus/runner.rs @@ -6,6 +6,7 @@ use anyhow::Context as _; use std::{collections::HashMap, sync::Arc}; use zksync_concurrency::{ctx, ctx::channel, oneshot, scope, sync, time}; use zksync_consensus_roles::validator; +use zksync_protobuf::kB; /// How often we should retry to establish a connection to a validator. /// TODO(gprusak): once it becomes relevant, choose a more appropriate retry strategy. @@ -19,18 +20,29 @@ const PING_TIMEOUT: time::Duration = time::Duration::seconds(10); /// changes. That requires tighter integration with the consensus. const MSG_TIMEOUT: time::Duration = time::Duration::seconds(10); +struct Server { + out: channel::UnboundedSender, + max_block_size: usize, +} + #[async_trait::async_trait] -impl rpc::Handler for channel::UnboundedSender { +impl rpc::Handler for Server { + /// Here we bound the buffering of incoming consensus messages. + fn max_req_size(&self) -> usize { + self.max_block_size.saturating_add(kB) + } + async fn handle( &self, ctx: &ctx::Ctx, req: rpc::consensus::Req, ) -> anyhow::Result { let (send, recv) = oneshot::channel(); - self.send(io::OutputMessage::Consensus(io::ConsensusReq { - msg: req.0, - ack: send, - })); + self.out + .send(io::OutputMessage::Consensus(io::ConsensusReq { + msg: req.0, + ack: send, + })); recv.recv_or_disconnected(ctx).await??; Ok(rpc::consensus::Resp) } @@ -40,25 +52,36 @@ impl rpc::Handler for channel::UnboundedSender, mut stream: noise::Stream, ) -> anyhow::Result<()> { - let peer = handshake::inbound(ctx, &state.cfg.key, &mut stream).await?; - state.inbound.insert(peer.clone()).await?; - let ping_client = rpc::Client::::new(ctx); + let consensus_state = state + .consensus + .as_ref() + .context("Node does not accept consensus network connections")?; + let peer = handshake::inbound(ctx, &consensus_state.cfg.key, &mut stream).await?; + consensus_state.inbound.insert(peer.clone()).await?; let res = scope::run!(ctx, |ctx, s| async { - s.spawn(ping_client.ping_loop(ctx, PING_TIMEOUT)); - rpc::Service::new() - .add_client(&ping_client) + let mut service = rpc::Service::new() .add_server(rpc::ping::Server) - .add_server(sender.clone()) - .run(ctx, stream) - .await?; + .add_server(Server { + out: sender.clone(), + max_block_size: state.cfg.max_block_size, + }); + if state.cfg.enable_pings { + let ping_client = rpc::Client::::new(ctx); + service = service.add_client(&ping_client); + s.spawn(async { + let ping_client = ping_client; + ping_client.ping_loop(ctx, PING_TIMEOUT).await + }); + } + service.run(ctx, stream).await?; Ok(()) }) .await; - state.inbound.remove(&peer).await; + consensus_state.inbound.remove(&peer).await; res } @@ -101,6 +124,7 @@ pub(crate) async fn run_client( .iter() .map(|peer| (peer.clone(), rpc::Client::::new(ctx))) .collect(); + scope::run!(ctx, |ctx, s| async { // Spawn outbound connections. for (peer, client) in &clients { @@ -135,7 +159,9 @@ pub(crate) async fn run_client( let client = clients.get(&val).context("unknown validator")?; s.spawn(async { let req = rpc::consensus::Req(msg.message); - if let Err(err) = client.call(&ctx.with_timeout(MSG_TIMEOUT), &req).await { + if let Err(err) = + client.call(&ctx.with_timeout(MSG_TIMEOUT), &req, kB).await + { tracing::info!("client.consensus(): {err:#}"); } Ok(()) @@ -148,7 +174,7 @@ pub(crate) async fn run_client( s.spawn(async { let req = req; if let Err(err) = - client.call(&ctx.with_timeout(MSG_TIMEOUT), &req).await + client.call(&ctx.with_timeout(MSG_TIMEOUT), &req, kB).await { tracing::info!("client.consensus(): {err:#}"); } diff --git a/node/actors/network/src/consensus/tests.rs b/node/actors/network/src/consensus/tests.rs index a0b6be73..fcf8c574 100644 --- a/node/actors/network/src/consensus/tests.rs +++ b/node/actors/network/src/consensus/tests.rs @@ -1,38 +1,35 @@ use super::*; -use crate::{io, preface, rpc, run_network, testonly}; -use anyhow::Context as _; +use crate::{io, preface, rpc, testonly}; use rand::Rng; use tracing::Instrument as _; use zksync_concurrency::{ctx, net, scope, testonly::abort_on_panic}; use zksync_consensus_roles::validator; -use zksync_consensus_utils::pipe; +use zksync_consensus_storage::testonly::new_store; #[tokio::test] async fn test_one_connection_per_validator() { abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); let rng = &mut ctx.rng(); - - let mut nodes = testonly::Instance::new(rng, 3, 1); + let setup = validator::testonly::GenesisSetup::new(rng, 3); + let nodes = testonly::new_configs(rng, &setup, 1); scope::run!(ctx, |ctx,s| async { - for (i, node) in nodes.iter().enumerate() { - let (network_pipe, _) = pipe::new(); - - s.spawn_bg(run_network( - ctx, - node.state.clone(), - network_pipe - ).instrument(tracing::info_span!("node", i))); - } + let (store,runner) = new_store(ctx,&setup.blocks[0]).await; + s.spawn_bg(runner.run(ctx)); + let nodes : Vec<_> = nodes.into_iter().enumerate().map(|(i,node)| { + let (node,runner) = testonly::Instance::new(node, store.clone()); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); + node + }).collect(); tracing::info!("waiting for all gossip to be established"); - for node in &mut nodes { + for node in &nodes { node.wait_for_gossip_connections().await; } tracing::info!("waiting for all connections to be established"); - for node in &mut nodes { + for node in &nodes { node.wait_for_consensus_connections().await; } @@ -70,45 +67,29 @@ async fn test_address_change() { let ctx = &ctx::test_root(&ctx::AffineClock::new(20.)); let rng = &mut ctx.rng(); - let mut nodes = testonly::Instance::new(rng, 5, 1); + let setup = validator::testonly::GenesisSetup::new(rng, 5); + let mut cfgs = testonly::new_configs(rng, &setup, 1); scope::run!(ctx, |ctx, s| async { - for (i, n) in nodes.iter().enumerate().skip(1) { - let (pipe, _) = pipe::new(); - s.spawn_bg( - run_network(ctx, n.state.clone(), pipe).instrument(tracing::info_span!("node", i)), - ); + let (store, runner) = new_store(ctx, &setup.blocks[0]).await; + s.spawn_bg(runner.run(ctx)); + let mut nodes: Vec<_> = cfgs + .iter() + .enumerate() + .map(|(i, cfg)| { + let (node, runner) = testonly::Instance::new(cfg.clone(), store.clone()); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); + node + }) + .collect(); + for n in &nodes { + n.wait_for_consensus_connections().await; } - - scope::run!(ctx, |ctx, s| async { - let (pipe, _) = pipe::new(); - s.spawn_bg( - run_network(ctx, nodes[0].state.clone(), pipe) - .instrument(tracing::info_span!("node0")), - ); - for n in &nodes { - n.wait_for_consensus_connections().await; - } - Ok(()) - }) - .await - .context("run1")?; + nodes[0].terminate(ctx).await?; // All nodes should lose connection to node[0]. let key0 = nodes[0].consensus_config().key.public(); for node in &nodes { - let consensus_state = node.state.consensus.as_ref().unwrap(); - consensus_state - .inbound - .subscribe() - .wait_for(|got| !got.current().contains(&key0)) - .await - .unwrap(); - consensus_state - .outbound - .subscribe() - .wait_for(|got| !got.current().contains(&key0)) - .await - .unwrap(); + node.wait_for_consensus_disconnect(ctx, &key0).await?; } // Change the address of node[0]. @@ -116,24 +97,14 @@ async fn test_address_change() { // node[0] is expected to connect to its gossip peers. // Then it should broadcast its new address and the consensus network // should get reconstructed. - let mut cfg = nodes[0].to_config(); - cfg.server_addr = net::tcp::testonly::reserve_listener(); - cfg.consensus.as_mut().unwrap().public_addr = *cfg.server_addr; - nodes[0] = testonly::Instance::from_cfg(cfg); - - scope::run!(ctx, |ctx, s| async { - let (pipe, _) = pipe::new(); - s.spawn_bg( - run_network(ctx, nodes[0].state.clone(), pipe) - .instrument(tracing::info_span!("node0")), - ); - for n in &nodes { - n.wait_for_consensus_connections().await; - } - Ok(()) - }) - .await - .context("run2")?; + cfgs[0].server_addr = net::tcp::testonly::reserve_listener(); + cfgs[0].consensus.as_mut().unwrap().public_addr = *cfgs[0].server_addr; + let (node0, runner) = testonly::Instance::new(cfgs[0].clone(), store.clone()); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node0"))); + nodes[0] = node0; + for n in &nodes { + n.wait_for_consensus_connections().await; + } Ok(()) }) .await @@ -148,18 +119,22 @@ async fn test_transmission() { let ctx = &ctx::test_root(&ctx::RealClock); let rng = &mut ctx.rng(); - let mut nodes = testonly::Instance::new(rng, 2, 1); + let setup = validator::testonly::GenesisSetup::new(rng, 2); + let cfgs = testonly::new_configs(rng, &setup, 1); scope::run!(ctx, |ctx, s| async { - let mut pipes = vec![]; - for (i, n) in nodes.iter().enumerate() { - let (network_pipe, pipe) = pipe::new(); - pipes.push(pipe); - s.spawn_bg( - run_network(ctx, n.state.clone(), network_pipe) - .instrument(tracing::info_span!("node", i)), - ); - } + let (store, runner) = new_store(ctx, &setup.blocks[0]).await; + s.spawn_bg(runner.run(ctx)); + let mut nodes: Vec<_> = cfgs + .iter() + .enumerate() + .map(|(i, cfg)| { + let (node, runner) = testonly::Instance::new(cfg.clone(), store.clone()); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); + node + }) + .collect(); + tracing::info!("waiting for all connections to be established"); for n in &mut nodes { n.wait_for_consensus_connections().await; @@ -170,10 +145,10 @@ async fn test_transmission() { message: want.clone(), recipient: io::Target::Validator(nodes[1].consensus_config().key.public()), }; - pipes[0].send(in_message.into()); + nodes[0].pipe.send(in_message.into()); let got = loop { - let output_message = pipes[1].recv(ctx).await.unwrap(); + let output_message = nodes[1].pipe.recv(ctx).await.unwrap(); let io::OutputMessage::Consensus(got) = output_message else { continue; }; diff --git a/node/actors/network/src/event.rs b/node/actors/network/src/event.rs index 68d9ab6a..adf792e2 100644 --- a/node/actors/network/src/event.rs +++ b/node/actors/network/src/event.rs @@ -1,7 +1,6 @@ //! Mechanism for network State to report internal events. //! It is used in tests to await a specific state. use crate::State; -use zksync_consensus_roles::{node, validator}; impl State { /// Sends an event to the `self.events` channel. @@ -13,20 +12,10 @@ impl State { } } -#[derive(Debug)] -pub enum StreamEvent { - InboundOpened(Key), - InboundClosed(Key), - OutboundOpened(Key), - OutboundClosed(Key), -} - /// Events observable in tests. /// Feel free to extend this enum if you need to /// write a test awaiting some specific event/state. #[derive(Debug)] pub enum Event { - Consensus(StreamEvent), - Gossip(StreamEvent), ValidatorAddrsUpdated, } diff --git a/node/actors/network/src/frame.rs b/node/actors/network/src/frame.rs index c20969f5..ef40bcaa 100644 --- a/node/actors/network/src/frame.rs +++ b/node/actors/network/src/frame.rs @@ -10,6 +10,7 @@ use zksync_concurrency::{ctx, io}; pub(crate) async fn mux_recv_proto( ctx: &ctx::Ctx, stream: &mut mux::ReadStream, + max_size: usize, ) -> anyhow::Result<(T, usize)> { let mut msg_size = bytes::Buffer::new(4); stream.read_exact(ctx, &mut msg_size).await?; @@ -17,8 +18,8 @@ pub(crate) async fn mux_recv_proto( anyhow::bail!("end of stream"); } let msg_size = u32::from_le_bytes(msg_size.prefix()) as usize; - if msg_size > T::max_size() { - anyhow::bail!("message too large"); + if msg_size > max_size { + anyhow::bail!("message too large: max = {}B, got {msg_size}B", max_size); } let mut msg = bytes::Buffer::new(msg_size); stream.read_exact(ctx, &mut msg).await?; @@ -38,9 +39,8 @@ pub(crate) async fn mux_send_proto( msg: &T, ) -> anyhow::Result { let msg = zksync_protobuf::encode(msg); - assert!(msg.len() <= T::max_size(), "message too large"); stream - .write_all(ctx, &u32::to_le_bytes(msg.len() as u32)) + .write_all(ctx, &u32::to_le_bytes(msg.len().try_into()?)) .await?; stream.write_all(ctx, &msg).await?; Ok(msg.len()) @@ -52,13 +52,15 @@ pub(crate) async fn mux_send_proto( pub(crate) async fn recv_proto( ctx: &ctx::Ctx, stream: &mut S, + max_size: usize, ) -> anyhow::Result { let mut msg_size = [0u8; 4]; io::read_exact(ctx, stream, &mut msg_size).await??; let msg_size = u32::from_le_bytes(msg_size); - if msg_size as usize > T::max_size() { - anyhow::bail!("message too large"); - } + anyhow::ensure!( + msg_size as usize <= max_size, + "message too large: max = {max_size}, got {msg_size}", + ); let mut msg = vec![0u8; msg_size as usize]; io::read_exact(ctx, stream, &mut msg[..]).await??; zksync_protobuf::decode(&msg) @@ -71,8 +73,7 @@ pub(crate) async fn send_proto anyhow::Result<()> { let msg = zksync_protobuf::encode(msg); - assert!(msg.len() <= T::max_size(), "message too large"); - io::write_all(ctx, stream, &u32::to_le_bytes(msg.len() as u32)).await??; + io::write_all(ctx, stream, &u32::to_le_bytes(msg.len().try_into()?)).await??; io::write_all(ctx, stream, &msg).await??; io::flush(ctx, stream).await??; Ok(()) diff --git a/node/actors/network/src/gossip/handshake/mod.rs b/node/actors/network/src/gossip/handshake/mod.rs index e823baa0..f5d7b22e 100644 --- a/node/actors/network/src/gossip/handshake/mod.rs +++ b/node/actors/network/src/gossip/handshake/mod.rs @@ -72,7 +72,7 @@ pub(super) async fn outbound( ) .await .map_err(Error::Stream)?; - let h: Handshake = frame::recv_proto(ctx, stream) + let h: Handshake = frame::recv_proto(ctx, stream, Handshake::max_size()) .await .map_err(Error::Stream)?; if h.session_id.msg != session_id { @@ -92,7 +92,7 @@ pub(super) async fn inbound( ) -> Result { let ctx = &ctx.with_timeout(TIMEOUT); let session_id = node::SessionId(stream.id().encode()); - let h: Handshake = frame::recv_proto(ctx, stream) + let h: Handshake = frame::recv_proto(ctx, stream, Handshake::max_size()) .await .map_err(Error::Stream)?; if h.session_id.msg != session_id { diff --git a/node/actors/network/src/gossip/handshake/tests.rs b/node/actors/network/src/gossip/handshake/tests.rs index 457eadd3..2e024861 100644 --- a/node/actors/network/src/gossip/handshake/tests.rs +++ b/node/actors/network/src/gossip/handshake/tests.rs @@ -9,7 +9,7 @@ use zksync_protobuf::testonly::test_encode_random; #[test] fn test_schema_encode_decode() { let rng = &mut ctx::test_root(&ctx::RealClock).rng(); - test_encode_random::<_, Handshake>(rng); + test_encode_random::(rng); } fn make_cfg(rng: &mut R) -> Config { @@ -18,7 +18,6 @@ fn make_cfg(rng: &mut R) -> Config { dynamic_inbound_limit: 0, static_inbound: HashSet::default(), static_outbound: HashMap::default(), - enable_pings: true, } } @@ -69,7 +68,7 @@ async fn test_session_id_mismatch() { let (mut s1, s2) = noise::testonly::pipe(ctx).await; s.spawn_bg(async { let mut s2 = s2; - let _: Handshake = frame::recv_proto(ctx, &mut s2).await?; + let _: Handshake = frame::recv_proto(ctx, &mut s2, Handshake::max_size()).await?; frame::send_proto( ctx, &mut s2, @@ -134,7 +133,7 @@ async fn test_invalid_signature() { let (mut s0, s1) = noise::testonly::pipe(ctx).await; s.spawn_bg(async { let mut s1 = s1; - let mut h: Handshake = frame::recv_proto(ctx, &mut s1).await?; + let mut h: Handshake = frame::recv_proto(ctx, &mut s1, Handshake::max_size()).await?; h.session_id.key = cfg1.key.public(); frame::send_proto(ctx, &mut s1, &h).await?; Ok(()) diff --git a/node/actors/network/src/gossip/runner.rs b/node/actors/network/src/gossip/runner.rs index f614e663..744b7b5b 100644 --- a/node/actors/network/src/gossip/runner.rs +++ b/node/actors/network/src/gossip/runner.rs @@ -1,20 +1,14 @@ use super::{handshake, ValidatorAddrs}; -use crate::{ - consensus, - event::{Event, StreamEvent}, - io, noise, preface, rpc, State, -}; -use anyhow::Context; +use crate::{consensus, event::Event, io, noise, preface, rpc, State}; use async_trait::async_trait; use std::sync::Arc; -use tracing::Instrument as _; use zksync_concurrency::{ ctx::{self, channel}, - oneshot, scope, - sync::{self, watch}, - time, + oneshot, scope, sync, time, }; use zksync_consensus_roles::{node, validator}; +use zksync_consensus_storage::BlockStore; +use zksync_protobuf::kB; /// How often we should retry to establish a connection to a validator. /// TODO(gprusak): once it becomes relevant, choose a more appropriate retry strategy. @@ -28,116 +22,67 @@ const PING_TIMEOUT: time::Duration = time::Duration::seconds(5); /// is down. const ADDRESS_ANNOUNCER_INTERVAL: time::Duration = time::Duration::minutes(10); -struct ValidatorAddrsServer<'a> { - state: &'a State, - peer_validator_addrs: sync::Mutex, -} - -impl<'a> ValidatorAddrsServer<'a> { - fn new(state: &'a State) -> Self { - Self { - state, - peer_validator_addrs: sync::Mutex::default(), - } - } -} +struct PushValidatorAddrsServer<'a>(&'a State); #[async_trait] -impl rpc::Handler for ValidatorAddrsServer<'_> { +impl rpc::Handler for PushValidatorAddrsServer<'_> { + fn max_req_size(&self) -> usize { + 100 * kB + } async fn handle( &self, - ctx: &ctx::Ctx, - _req: rpc::sync_validator_addrs::Req, - ) -> anyhow::Result { - let mut old = sync::lock(ctx, &self.peer_validator_addrs) - .await? - .into_async(); - let mut sub = self.state.gossip.validator_addrs.subscribe(); - loop { - let new = sync::changed(ctx, &mut sub).await?.clone(); - let diff = new.get_newer(&old); - if diff.is_empty() { - continue; - } - *old = new; - return Ok(rpc::sync_validator_addrs::Resp(diff)); - } + _ctx: &ctx::Ctx, + req: rpc::push_validator_addrs::Req, + ) -> anyhow::Result<()> { + self.0.event(Event::ValidatorAddrsUpdated); + self.0 + .gossip + .validator_addrs + .update(&self.0.cfg.validators, &req.0[..]) + .await?; + Ok(()) } } #[derive(Debug, Clone, Copy)] -struct SyncBlocksServer<'a> { +struct PushBlockStoreStateServer<'a> { peer: &'a node::PublicKey, sender: &'a channel::UnboundedSender, } #[async_trait] -impl rpc::Handler for SyncBlocksServer<'_> { - #[tracing::instrument( - level = "trace", - skip_all, - err, - fields(update = ?req.numbers()), - )] +impl rpc::Handler for PushBlockStoreStateServer<'_> { + fn max_req_size(&self) -> usize { + 10 * kB + } async fn handle( &self, ctx: &ctx::Ctx, - req: io::SyncState, - ) -> anyhow::Result { + req: rpc::push_block_store_state::Req, + ) -> anyhow::Result<()> { let (response, response_receiver) = oneshot::channel(); let message = io::SyncBlocksRequest::UpdatePeerSyncState { peer: self.peer.clone(), - state: Box::new(req), + state: req.0, response, }; self.sender.send(message.into()); response_receiver.recv_or_disconnected(ctx).await??; - Ok(rpc::sync_blocks::SyncStateResponse) - } -} - -#[tracing::instrument(level = "trace", skip_all, err)] -async fn run_sync_blocks_client( - ctx: &ctx::Ctx, - client: &rpc::Client, - mut sync_state_subscriber: watch::Receiver, -) -> anyhow::Result<()> { - // Send the initial value immediately. - let mut updated_state = sync_state_subscriber.borrow_and_update().clone(); - loop { - let span = tracing::trace_span!("push_sync_state", update = ?updated_state.numbers()); - let push_sync_state = async { - tracing::trace!("sending"); - client - .call(ctx, &updated_state) - .await - .context("sync_blocks_client")?; - tracing::trace!("sent"); - updated_state = sync::changed(ctx, &mut sync_state_subscriber) - .await - .context("sync_blocks_client update")? - .clone(); - anyhow::Ok(()) - }; - push_sync_state.instrument(span).await?; + Ok(()) } } #[async_trait] -impl rpc::Handler for SyncBlocksServer<'_> { +impl rpc::Handler for &BlockStore { + fn max_req_size(&self) -> usize { + kB + } async fn handle( &self, ctx: &ctx::Ctx, - req: rpc::sync_blocks::GetBlockRequest, - ) -> anyhow::Result { - let (response, response_receiver) = oneshot::channel(); - let message = io::SyncBlocksRequest::GetBlock { - block_number: req.0, - response, - }; - self.sender.send(message.into()); - let response = response_receiver.recv_or_disconnected(ctx).await??; - Ok(response.into()) + req: rpc::get_block::Req, + ) -> anyhow::Result { + Ok(rpc::get_block::Resp(self.block(ctx, req.0).await?)) } } @@ -146,103 +91,80 @@ async fn run_stream( state: &State, peer: &node::PublicKey, sender: &channel::UnboundedSender, - get_blocks_client: &rpc::Client, stream: noise::Stream, ) -> anyhow::Result<()> { - let sync_validator_addrs_client = rpc::Client::::new(ctx); - let sync_blocks_server = SyncBlocksServer { peer, sender }; - let sync_state_client = rpc::Client::::new(ctx); - - let enable_pings = state.gossip.cfg.enable_pings; - - scope::run!(ctx, |ctx, s| async { - s.spawn(async { - let mut service = rpc::Service::new() - .add_client(&sync_validator_addrs_client) - .add_server(ValidatorAddrsServer::new(state)) - .add_client(&sync_state_client) - .add_server::(sync_blocks_server) - .add_client(get_blocks_client) - .add_server::(sync_blocks_server) - .add_server(rpc::ping::Server); + let push_validator_addrs_client = rpc::Client::::new(ctx); + let push_validator_addrs_server = PushValidatorAddrsServer(state); + let push_block_store_state_client = rpc::Client::::new(ctx); + let push_block_store_state_server = PushBlockStoreStateServer { peer, sender }; + + let get_block_client = Arc::new(rpc::Client::::new(ctx)); + state + .gossip + .get_block_clients + .insert(peer.clone(), get_block_client.clone()); + + let res = scope::run!(ctx, |ctx, s| async { + let mut service = rpc::Service::new() + .add_client(&push_validator_addrs_client) + .add_server(push_validator_addrs_server) + .add_client(&push_block_store_state_client) + .add_server(push_block_store_state_server) + .add_client(&get_block_client) + .add_server(&*state.gossip.block_store) + .add_server(rpc::ping::Server); + + if state.cfg.enable_pings { + let ping_client = rpc::Client::::new(ctx); + service = service.add_client(&ping_client); + s.spawn(async { + let ping_client = ping_client; + ping_client.ping_loop(ctx, PING_TIMEOUT).await + }); + } - if enable_pings { - let ping_client = rpc::Client::::new(ctx); - service = service.add_client(&ping_client); - let ctx = &*ctx; - s.spawn(async { - let ping_client = ping_client; - ping_client.ping_loop(ctx, PING_TIMEOUT).await - }); + // Push block store state updates to peer. + s.spawn::<()>(async { + let mut sub = state.gossip.block_store.subscribe(); + sub.mark_changed(); + loop { + let state = sync::changed(ctx, &mut sub).await?.clone(); + let req = rpc::push_block_store_state::Req(state); + push_block_store_state_client.call(ctx, &req, kB).await?; } - - service.run(ctx, stream).await?; - Ok(()) }); - if let Some(sync_state_subscriber) = state.gossip.sync_state.clone() { - s.spawn(run_sync_blocks_client( - ctx, - &sync_state_client, - sync_state_subscriber, - )); - } + s.spawn::<()>(async { + // Push validator addrs updates to peer. + let mut old = ValidatorAddrs::default(); + let mut sub = state.gossip.validator_addrs.subscribe(); + sub.mark_changed(); + loop { + let new = sync::changed(ctx, &mut sub).await?.clone(); + let diff = new.get_newer(&old); + if diff.is_empty() { + continue; + } + old = new; + let req = rpc::push_validator_addrs::Req(diff); + push_validator_addrs_client.call(ctx, &req, kB).await?; + } + }); - loop { - let resp = sync_validator_addrs_client - .call(ctx, &rpc::sync_validator_addrs::Req) - .await?; - state - .gossip - .validator_addrs - .update(&state.cfg.validators, &resp.0[..]) - .await?; - state.event(Event::ValidatorAddrsUpdated); - } + service.run(ctx, stream).await?; + Ok(()) }) - .await -} - -async fn handle_clients_and_run_stream( - ctx: &ctx::Ctx, - state: &State, - peer: &node::PublicKey, - sender: &channel::UnboundedSender, - stream: noise::Stream, -) -> anyhow::Result<()> { - let clients = &state.gossip.get_block_clients; - let get_blocks_client = rpc::Client::::new(ctx); - let get_blocks_client = Arc::new(get_blocks_client); - sync::lock(ctx, clients) - .await - .context("get_block_clients")? - .insert(peer.clone(), get_blocks_client.clone()); - - let res = run_stream(ctx, state, peer, sender, &get_blocks_client, stream).await; + .await; - // We remove the peer client unconditionally, even if the context is cancelled, so that the state - // is consistent. - let mut client_map = clients.lock().await; - // The get_blocks client might have been replaced because we might have both an inbound and outbound - // connections to the same peer, and we utilize only one of them to send `get_block` requests. - // Thus, we need to check it before removing from the map. - let is_same_client = client_map - .get(peer) - .map_or(false, |client| Arc::ptr_eq(client, &get_blocks_client)); - if is_same_client { - client_map.remove(peer); - } + state + .gossip + .get_block_clients + .remove(peer.clone(), get_block_client); res } /// Handles an inbound stream. /// Closes the stream if there is another inbound stream opened from the same peer. -#[tracing::instrument( - level = "trace", - skip_all, - err, - fields(my_key = ?state.gossip.cfg.key.public(), peer), -)] pub(crate) async fn run_inbound_stream( ctx: &ctx::Ctx, state: &State, @@ -251,24 +173,12 @@ pub(crate) async fn run_inbound_stream( ) -> anyhow::Result<()> { let peer = handshake::inbound(ctx, &state.gossip.cfg, &mut stream).await?; tracing::Span::current().record("peer", tracing::field::debug(&peer)); - state.gossip.inbound.insert(peer.clone()).await?; - state.event(Event::Gossip(StreamEvent::InboundOpened(peer.clone()))); - - let res = handle_clients_and_run_stream(ctx, state, &peer, sender, stream).await; - + let res = run_stream(ctx, state, &peer, sender, stream).await; state.gossip.inbound.remove(&peer).await; - state.event(Event::Gossip(StreamEvent::InboundClosed(peer))); - res } -#[tracing::instrument( - level = "trace", - skip(ctx, state, sender), - err, - fields(my_key = ?state.gossip.cfg.key.public()) -)] async fn run_outbound_stream( ctx: &ctx::Ctx, state: &State, @@ -280,13 +190,8 @@ async fn run_outbound_stream( handshake::outbound(ctx, &state.gossip.cfg, &mut stream, peer).await?; state.gossip.outbound.insert(peer.clone()).await?; - state.event(Event::Gossip(StreamEvent::OutboundOpened(peer.clone()))); - - let res = handle_clients_and_run_stream(ctx, state, peer, sender, stream).await; - + let res = run_stream(ctx, state, peer, sender, stream).await; state.gossip.outbound.remove(peer).await; - state.event(Event::Gossip(StreamEvent::OutboundClosed(peer.clone()))); - res } @@ -328,33 +233,6 @@ async fn run_address_announcer( } } -#[tracing::instrument(level = "trace", skip_all, err, fields(recipient, number))] -async fn handle_sync_blocks_message( - ctx: &ctx::Ctx, - state: &State, - message: io::SyncBlocksInputMessage, -) -> anyhow::Result<()> { - let io::SyncBlocksInputMessage::GetBlock { - recipient, - number, - response, - } = message; - - tracing::Span::current() - .record("recipient", tracing::field::debug(&recipient)) - .record("number", number.0); - - let clients = &state.gossip.get_block_clients; - let recipient_client = sync::lock(ctx, clients).await?.get(&recipient).cloned(); - let recipient_client = recipient_client.context("recipient is unreachable")?; - let request = rpc::sync_blocks::GetBlockRequest(number); - let peer_response = recipient_client.call(ctx, &request).await?.0; - - response - .send(peer_response) - .map_err(|_| anyhow::anyhow!("cannot send response to request to request initiator")) -} - /// Runs an RPC client trying to maintain 1 outbound connection per validator. pub(crate) async fn run_client( ctx: &ctx::Ctx, @@ -379,9 +257,23 @@ pub(crate) async fn run_client( s.spawn(async { while let Ok(message) = receiver.recv(ctx).await { s.spawn(async { - handle_sync_blocks_message(ctx, state, message).await.ok(); - // ^ The client errors are logged by the method instrumentation wrapper, - // so we don't need logging here. + let message = message; + let io::SyncBlocksInputMessage::GetBlock { + recipient, + number, + response, + } = message; + let _ = response.send( + match state + .gossip + .get_block(ctx, &recipient, number, state.cfg.max_block_size) + .await + { + Ok(Some(block)) => Ok(block), + Ok(None) => Err(io::GetBlockError::NotAvailable), + Err(err) => Err(io::GetBlockError::Internal(err)), + }, + ); Ok(()) }); } diff --git a/node/actors/network/src/gossip/state.rs b/node/actors/network/src/gossip/state.rs index c40f3cd6..3b33631f 100644 --- a/node/actors/network/src/gossip/state.rs +++ b/node/actors/network/src/gossip/state.rs @@ -1,10 +1,13 @@ -use crate::{io::SyncState, pool::PoolWatch, rpc, watch::Watch}; +use crate::{pool::PoolWatch, rpc, watch::Watch}; +use anyhow::Context as _; use std::{ collections::{HashMap, HashSet}, - sync::Arc, + sync::{Arc, Mutex}, }; -use zksync_concurrency::sync::{self, watch, Mutex}; +use zksync_concurrency::{ctx, sync}; use zksync_consensus_roles::{node, validator}; +use zksync_consensus_storage::BlockStore; +use zksync_protobuf::kB; /// Mapping from validator::PublicKey to a signed validator::NetAddress. /// Represents the currents state of node's knowledge about the validator endpoints. @@ -118,17 +121,49 @@ pub struct Config { pub key: node::SecretKey, /// Limit on the number of inbound connections outside /// of the `static_inbound` set. - pub dynamic_inbound_limit: u64, + pub dynamic_inbound_limit: usize, /// Inbound connections that should be unconditionally accepted. pub static_inbound: HashSet, /// Outbound connections that the node should actively try to /// establish and maintain. pub static_outbound: HashMap, - /// Enables pings sent over the gossip network. - pub enable_pings: bool, } -type ClientMap = Mutex>>>; +/// Multimap of pointers indexed by `node::PublicKey`. +/// Used to maintain a collection GetBlock rpc clients. +/// TODO(gprusak): consider upgrading PoolWatch instead. +pub(crate) struct ArcMap(Mutex>>>); + +impl Default for ArcMap { + fn default() -> Self { + Self(Mutex::default()) + } +} + +impl ArcMap { + /// Fetches any pointer for the given key. + pub(crate) fn get_any(&self, key: &node::PublicKey) -> Option> { + self.0.lock().unwrap().get(key)?.first().cloned() + } + + /// Insert a pointer. + pub(crate) fn insert(&self, key: node::PublicKey, p: Arc) { + self.0.lock().unwrap().entry(key).or_default().push(p); + } + + /// Removes a pointer. + pub(crate) fn remove(&self, key: node::PublicKey, p: Arc) { + let mut this = self.0.lock().unwrap(); + use std::collections::hash_map::Entry; + let Entry::Occupied(mut e) = this.entry(key) else { + return; + }; + e.get_mut().retain(|c| !Arc::ptr_eq(&p, c)); + if e.get_mut().is_empty() { + e.remove(); + } + } +} /// Gossip network state. pub(crate) struct State { @@ -140,25 +175,42 @@ pub(crate) struct State { pub(crate) outbound: PoolWatch, /// Current state of knowledge about validators' endpoints. pub(crate) validator_addrs: ValidatorAddrsWatch, - /// Subscriber for `SyncState` updates. - pub(crate) sync_state: Option>, + /// Block store to serve `get_block` requests from. + pub(crate) block_store: Arc, /// Clients for `get_block` requests for each currently active peer. - pub(crate) get_block_clients: ClientMap, + pub(crate) get_block_clients: ArcMap>, } impl State { /// Constructs a new State. - pub(crate) fn new(cfg: Config, sync_state: Option>) -> Self { + pub(crate) fn new(cfg: Config, block_store: Arc) -> Self { Self { - inbound: PoolWatch::new( - cfg.static_inbound.clone(), - cfg.dynamic_inbound_limit as usize, - ), + inbound: PoolWatch::new(cfg.static_inbound.clone(), cfg.dynamic_inbound_limit), outbound: PoolWatch::new(cfg.static_outbound.keys().cloned().collect(), 0), validator_addrs: ValidatorAddrsWatch::default(), - sync_state, - get_block_clients: ClientMap::default(), + block_store, + get_block_clients: ArcMap::default(), cfg, } } + + pub(super) async fn get_block( + &self, + ctx: &ctx::Ctx, + recipient: &node::PublicKey, + number: validator::BlockNumber, + max_block_size: usize, + ) -> anyhow::Result> { + Ok(self + .get_block_clients + .get_any(recipient) + .context("recipient is unreachable")? + .call( + ctx, + &rpc::get_block::Req(number), + max_block_size.saturating_add(kB), + ) + .await? + .0) + } } diff --git a/node/actors/network/src/gossip/tests.rs b/node/actors/network/src/gossip/tests.rs index 8091b352..3d4ab322 100644 --- a/node/actors/network/src/gossip/tests.rs +++ b/node/actors/network/src/gossip/tests.rs @@ -1,5 +1,5 @@ use super::*; -use crate::{event::Event, io, preface, rpc, rpc::Rpc as _, run_network, testonly}; +use crate::{event::Event, io, preface, rpc, rpc::Rpc as _, testonly}; use anyhow::Context as _; use pretty_assertions::assert_eq; use rand::Rng; @@ -10,15 +10,12 @@ use std::{ use test_casing::{test_casing, Product}; use tracing::Instrument as _; use zksync_concurrency::{ - ctx::{self, channel}, - oneshot, scope, - sync::{watch, Mutex}, - testonly::abort_on_panic, + ctx, oneshot, scope, sync, + testonly::{abort_on_panic, set_timeout}, time, }; -use zksync_consensus_roles as roles; use zksync_consensus_roles::validator::{self, BlockNumber, FinalBlock}; -use zksync_consensus_utils::pipe; +use zksync_consensus_storage::testonly::new_store; #[tokio::test] async fn test_one_connection_per_node() { @@ -26,35 +23,27 @@ async fn test_one_connection_per_node() { let ctx = &ctx::test_root(&ctx::RealClock); let rng = &mut ctx.rng(); - let mut nodes: Vec<_> = testonly::Instance::new(rng, 5, 2); + let setup = validator::testonly::GenesisSetup::new(rng, 5); + let cfgs = testonly::new_configs(rng, &setup, 2); scope::run!(ctx, |ctx,s| async { - for node in &nodes { - let (network_pipe, _) = pipe::new(); - - s.spawn_bg(run_network( - ctx, - node.state.clone(), - network_pipe - )); - } + let (store,runner) = new_store(ctx,&setup.blocks[0]).await; + s.spawn_bg(runner.run(ctx)); + let mut nodes : Vec<_> = cfgs.iter().enumerate().map(|(i,cfg)| { + let (node,runner) = testonly::Instance::new(cfg.clone(), store.clone()); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); + node + }).collect(); tracing::info!("waiting for all connections to be established"); for node in &mut nodes { - tracing::info!("node {:?} awaiting connections", node.consensus_config().key.public()); - let want = &node.state.gossip.cfg.static_outbound.keys().cloned().collect(); - let mut subscription = node.state.gossip.outbound.subscribe(); - subscription - .wait_for(|got| got.current() == want) - .await - .unwrap(); + node.wait_for_gossip_connections().await; } tracing::info!( "Impersonate a node, and try to establish additional connection to an already connected peer." ); - let my_gossip_config = &nodes[0].state.gossip.cfg; - let (peer, addr) = my_gossip_config.static_outbound.iter().next().unwrap(); + let (peer, addr) = cfgs[0].gossip.static_outbound.iter().next().unwrap(); let mut stream = preface::connect( ctx, *addr, @@ -63,7 +52,7 @@ async fn test_one_connection_per_node() { .await .context("preface::connect")?; - handshake::outbound(ctx, my_gossip_config, &mut stream, peer) + handshake::outbound(ctx, &cfgs[0].gossip, &mut stream, peer) .await .context("handshake::outbound")?; tracing::info!("The connection is expected to be closed automatically by peer."); @@ -236,31 +225,32 @@ async fn test_validator_addrs_propagation() { abort_on_panic(); let ctx = &ctx::test_root(&ctx::AffineClock::new(40.)); let rng = &mut ctx.rng(); - let nodes: Vec<_> = testonly::Instance::new(rng, 10, 1); + let setup = validator::testonly::GenesisSetup::new(rng, 10); + let cfgs = testonly::new_configs(rng, &setup, 1); scope::run!(ctx, |ctx, s| async { - for n in &nodes { - let (network_pipe, _) = pipe::new(); - s.spawn_bg(run_network(ctx, n.state.clone(), network_pipe)); - } - let want: HashMap<_, _> = nodes + let (store, runner) = new_store(ctx, &setup.blocks[0]).await; + s.spawn_bg(runner.run(ctx)); + let nodes: Vec<_> = cfgs + .iter() + .enumerate() + .map(|(i, cfg)| { + let (node, runner) = testonly::Instance::new(cfg.clone(), store.clone()); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); + node + }) + .collect(); + let want: HashMap<_, _> = cfgs .iter() - .map(|node| { - ( - node.consensus_config().key.public(), - node.consensus_config().public_addr, - ) + .map(|cfg| { + let cfg = cfg.consensus.as_ref().unwrap(); + (cfg.key.public(), cfg.public_addr) }) .collect(); for (i, node) in nodes.iter().enumerate() { tracing::info!("awaiting for node[{i}] to learn validator_addrs"); - node.state - .gossip - .validator_addrs - .subscribe() - .wait_for(|got| want == to_addr_map(got)) - .await - .unwrap(); + let sub = &mut node.state.gossip.validator_addrs.subscribe(); + sync::wait_for(ctx, sub, |got| want == to_addr_map(got)).await?; } Ok(()) }) @@ -268,7 +258,7 @@ async fn test_validator_addrs_propagation() { .unwrap(); } -const EXCHANGED_STATE_COUNT: u64 = 5; +const EXCHANGED_STATE_COUNT: usize = 5; const NETWORK_CONNECTIVITY_CASES: [(usize, usize); 5] = [(2, 1), (3, 2), (5, 3), (10, 4), (10, 7)]; /// Tests block syncing with global network synchronization (a next block becoming available @@ -278,18 +268,34 @@ const NETWORK_CONNECTIVITY_CASES: [(usize, usize); 5] = [(2, 1), (3, 2), (5, 3), #[tracing::instrument(level = "trace")] async fn syncing_blocks(node_count: usize, gossip_peers: usize) { abort_on_panic(); + let _guard = set_timeout(time::Duration::seconds(5)); let ctx = &ctx::test_root(&ctx::AffineClock::new(20.0)); - let ctx = &ctx.with_timeout(time::Duration::seconds(200)); let rng = &mut ctx.rng(); - let mut nodes = testonly::Instance::new(rng, node_count, gossip_peers); - let network_state = NetworkState::new(rng, &mut nodes); - + let mut setup = validator::testonly::GenesisSetup::new(rng, node_count); + let cfgs = testonly::new_configs(rng, &setup, gossip_peers); scope::run!(ctx, |ctx, s| async { - for node in nodes { - let (network_pipe, dispatcher_pipe) = pipe::new(); - s.spawn_bg(run_network(ctx, node.state.clone(), network_pipe)); - s.spawn(network_state.run_mock_dispatcher(ctx, dispatcher_pipe, gossip_peers)); + let mut nodes = vec![]; + for (i, cfg) in cfgs.into_iter().enumerate() { + let (store, runner) = new_store(ctx, &setup.blocks[0]).await; + s.spawn_bg(runner.run(ctx)); + let (node, runner) = testonly::Instance::new(cfg, store); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); + nodes.push(node); + } + setup.push_blocks(rng, EXCHANGED_STATE_COUNT); + for block in &setup.blocks { + for node in &nodes { + node.state + .gossip + .block_store + .queue_block(ctx, block.clone()) + .await + .context("queue_block()")?; + } + for node in &mut nodes { + wait_for_updates(ctx, node, gossip_peers, block).await?; + } } Ok(()) }) @@ -297,92 +303,28 @@ async fn syncing_blocks(node_count: usize, gossip_peers: usize) { .unwrap(); } -#[derive(Debug)] -struct NetworkStateInner { - node_count: usize, - updated_node_count: usize, - state_sender: watch::Sender, - future_states: Vec, -} - -/// Network state shared among all nodes in order to coordinate new block generation. -#[derive(Debug)] -struct NetworkState(Mutex); - -impl NetworkState { - fn new(rng: &mut impl Rng, nodes: &mut [testonly::Instance]) -> Self { - let node_count = nodes.len(); - let first_state = io::SyncState::gen(rng, BlockNumber(0)); - let future_states = (1..EXCHANGED_STATE_COUNT) - .map(|i| io::SyncState::gen(rng, BlockNumber(i))) - .rev() // We `pop()` states, so their ordering should be reversed - .collect(); - let (state_sender, state_subscriber) = watch::channel(first_state); - for node in nodes { - node.disable_gossip_pings(); - node.set_sync_state_subscriber(state_subscriber.clone()); - } - - let inner = NetworkStateInner { - node_count, - updated_node_count: 0, - state_sender, - future_states, +async fn wait_for_updates( + ctx: &ctx::Ctx, + node: &mut testonly::Instance, + peer_count: usize, + block: &FinalBlock, +) -> anyhow::Result<()> { + let mut updates = HashSet::new(); + while updates.len() < peer_count { + let io::OutputMessage::SyncBlocks(io::SyncBlocksRequest::UpdatePeerSyncState { + peer, + state, + response, + }) = node.pipe.recv(ctx).await.context("pipe.recv()")? + else { + continue; }; - Self(Mutex::new(inner)) - } - - async fn update_node_state(&self, received_state: &io::SyncState) { - let mut inner = self.0.lock().await; - assert_eq!(*received_state, *inner.state_sender.borrow()); - - inner.updated_node_count += 1; - if inner.updated_node_count == inner.node_count { - inner.updated_node_count = 0; - if let Some(next_state) = inner.future_states.pop() { - inner.state_sender.send_replace(next_state); - } - } - } - - async fn run_mock_dispatcher( - &self, - ctx: &ctx::Ctx, - mut pipe: pipe::DispatcherPipe, - peer_count: usize, - ) -> anyhow::Result<()> { - let mut received_states_by_peer = HashMap::new(); - let mut expected_latest_block_number = BlockNumber(0); - loop { - if let io::OutputMessage::SyncBlocks(req) = pipe.recv(ctx).await? { - match req { - io::SyncBlocksRequest::UpdatePeerSyncState { - peer, - state, - response, - } => { - let last_block_number = state.last_stored_block.message.proposal.number; - if last_block_number == expected_latest_block_number { - // We might receive outdated states, hence this check - received_states_by_peer.insert(peer.clone(), *state.clone()); - } - - if received_states_by_peer.len() == peer_count { - received_states_by_peer.clear(); - expected_latest_block_number = expected_latest_block_number.next(); - if expected_latest_block_number == BlockNumber(EXCHANGED_STATE_COUNT) { - // The node has received all generated blocks. - return Ok(()); - } - self.update_node_state(&state).await; - } - response.send(()).ok(); - } - _ => unreachable!(), - } - } + if state.last == block.justification { + updates.insert(peer); } + response.send(()).ok(); } + Ok(()) } /// Tests block syncing in an uncoordinated network, in which new blocks arrive at a schedule. @@ -399,44 +341,34 @@ async fn uncoordinated_block_syncing( state_generation_interval: time::Duration, ) { abort_on_panic(); + let _guard = set_timeout(time::Duration::seconds(5)); let ctx = &ctx::test_root(&ctx::AffineClock::new(20.0)); - let ctx = &ctx.with_timeout(time::Duration::seconds(200)); let rng = &mut ctx.rng(); - let mut nodes = testonly::Instance::new(rng, node_count, gossip_peers); - - let mut states: Vec<_> = (0..EXCHANGED_STATE_COUNT) - .map(|i| io::SyncState::gen(rng, BlockNumber(i))) - .collect(); - let first_state = states.remove(0); - let (state_sender, state_subscriber) = watch::channel(first_state); - - for node in &mut nodes { - node.disable_gossip_pings(); - node.set_sync_state_subscriber(state_subscriber.clone()); - } - + let mut setup = validator::testonly::GenesisSetup::empty(rng, node_count); + setup.push_blocks(rng, EXCHANGED_STATE_COUNT); scope::run!(ctx, |ctx, s| async { - s.spawn(async { - for state in states { - ctx.sleep(state_generation_interval).await?; - let last_block_number = state.last_stored_block.message.proposal.number; - tracing::debug!("Generated `SyncState` with last block number {last_block_number}"); - state_sender.send_replace(state); - } - Ok(()) - }); - - for node in &nodes { - let (network_pipe, dispatcher_pipe) = pipe::new(); - s.spawn_bg(run_network(ctx, node.state.clone(), network_pipe)); - let node_key = node.state.gossip.cfg.key.public(); - s.spawn(run_mock_uncoordinated_dispatcher( - ctx, - dispatcher_pipe, - node_key, - gossip_peers, - )); + for (i, cfg) in testonly::new_configs(rng, &setup, gossip_peers) + .into_iter() + .enumerate() + { + let i = i; + let (store, runner) = new_store(ctx, &setup.blocks[0]).await; + s.spawn_bg(runner.run(ctx)); + let (node, runner) = testonly::Instance::new(cfg, store.clone()); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); + s.spawn(async { + let store = store; + for block in &setup.blocks[1..] { + ctx.sleep(state_generation_interval).await?; + store.queue_block(ctx, block.clone()).await.unwrap(); + } + Ok(()) + }); + s.spawn(async { + let mut node = node; + wait_for_updates(ctx, &mut node, gossip_peers, setup.blocks.last().unwrap()).await + }); } Ok(()) }) @@ -444,126 +376,78 @@ async fn uncoordinated_block_syncing( .unwrap(); } -async fn run_mock_uncoordinated_dispatcher( - ctx: &ctx::Ctx, - mut pipe: pipe::DispatcherPipe, - node_key: roles::node::PublicKey, - peer_count: usize, -) -> anyhow::Result<()> { - let mut peers_with_final_state = HashSet::new(); - loop { - if let io::OutputMessage::SyncBlocks(req) = pipe.recv(ctx).await? { - match req { - io::SyncBlocksRequest::UpdatePeerSyncState { - peer, - state, - response, - } => { - let last_block_number = state.last_stored_block.message.proposal.number; - tracing::debug!( - "Node {node_key:?} received update with block number {last_block_number} from {peer:?}" - ); - assert!(last_block_number < BlockNumber(EXCHANGED_STATE_COUNT)); - if last_block_number == BlockNumber(EXCHANGED_STATE_COUNT - 1) { - peers_with_final_state.insert(peer.clone()); - if peers_with_final_state.len() == peer_count { - tracing::debug!("Node {node_key:?} received latest state from peers {peers_with_final_state:?}"); - return Ok(()); - } - } - response.send(()).ok(); - } - _ => unreachable!(), - } - } - } -} - #[test_casing(5, NETWORK_CONNECTIVITY_CASES)] #[tokio::test] async fn getting_blocks_from_peers(node_count: usize, gossip_peers: usize) { abort_on_panic(); - let ctx = &ctx::test_root(&ctx::ManualClock::new()); + let ctx = &ctx::test_root(&ctx::RealClock); let rng = &mut ctx.rng(); - let mut nodes = testonly::Instance::new(rng, node_count, gossip_peers); - for node in &mut nodes { - node.disable_gossip_pings(); - } - let node_keys: Vec<_> = nodes - .iter() - .map(|node| node.state.gossip.cfg.key.public()) - .collect(); + let setup = validator::testonly::GenesisSetup::new(rng, node_count); + let cfgs = testonly::new_configs(rng, &setup, gossip_peers); - let block: FinalBlock = rng.gen(); // All inbound and outbound peers should answer the request. let expected_successful_responses = (2 * gossip_peers).min(node_count - 1); + let (store, runner) = new_store(ctx, &setup.blocks[0]).await; scope::run!(ctx, |ctx, s| async { - let node_handles = nodes.iter().map(|node| { - let (network_pipe, dispatcher_pipe) = pipe::new(); - let (node_stop_sender, node_stop_receiver) = oneshot::channel::<()>(); - s.spawn_bg(async { - scope::run!(ctx, |ctx, s| async { - s.spawn_bg(run_network(ctx, node.state.clone(), network_pipe)); - s.spawn_bg(async { - run_get_block_dispatcher(ctx, dispatcher_pipe.recv, block.clone()).await; - Ok(()) - }); - node_stop_receiver.recv_or_disconnected(ctx).await.ok(); - Ok(()) - }) - .await - }); - (node, node_stop_sender, dispatcher_pipe.send) - }); - let mut node_handles: Vec<_> = node_handles.collect(); + s.spawn_bg(runner.run(ctx)); + let mut nodes: Vec<_> = cfgs + .into_iter() + .enumerate() + .map(|(i, cfg)| { + let (node, runner) = testonly::Instance::new(cfg, store.clone()); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); + node + }) + .collect(); - for (node, _, get_block_sender) in &node_handles { + for node in &nodes { node.wait_for_gossip_connections().await; - + tracing::info!("establish connections"); let mut successful_peer_responses = 0; - for peer_key in &node_keys { + for peer in &nodes { let (response, response_receiver) = oneshot::channel(); - let message = io::SyncBlocksInputMessage::GetBlock { - recipient: peer_key.clone(), - number: BlockNumber(1), - response, - }; - get_block_sender.send(message.into()); - if response_receiver.recv_or_disconnected(ctx).await?.is_ok() { + node.pipe.send( + io::SyncBlocksInputMessage::GetBlock { + recipient: peer.state.gossip.cfg.key.public(), + number: setup.blocks[0].header().number, + response, + } + .into(), + ); + tracing::info!("wait for response"); + if let Ok(block) = response_receiver.recv(ctx).await? { + assert_eq!(block, setup.blocks[0]); successful_peer_responses += 1; - } else { - let self_key = node.state.gossip.cfg.key.public(); - tracing::trace!("Request from {self_key:?} to {peer_key:?} was dropped"); } } assert_eq!(successful_peer_responses, expected_successful_responses); } - // Stop the last node by dropping its `node_stop_sender` and wait until it disconnects - // from all peers. - node_handles.pop().unwrap(); - let stopped_node_key = node_keys.last().unwrap(); - for (node, _, get_block_sender) in &node_handles { - let mut inbound_conns = node.state.gossip.inbound.subscribe(); - inbound_conns - .wait_for(|conns| !conns.current().contains(stopped_node_key)) - .await?; - let mut outbound_conns = node.state.gossip.outbound.subscribe(); - outbound_conns - .wait_for(|conns| !conns.current().contains(stopped_node_key)) - .await?; + tracing::info!("stop the last node"); + let last = nodes.pop().unwrap(); + last.terminate(ctx).await?; + + let stopped_node_key = last.state.gossip.cfg.key.public(); + for node in &nodes { + tracing::info!("wait for disconnection"); + node.wait_for_gossip_disconnect(ctx, &stopped_node_key) + .await + .unwrap(); + tracing::info!("wait for disconnection"); // Check that the node cannot access the stopped peer. let (response, response_receiver) = oneshot::channel(); - let message = io::SyncBlocksInputMessage::GetBlock { - recipient: stopped_node_key.clone(), - number: BlockNumber(1), - response, - }; - get_block_sender.send(message.into()); - assert!(response_receiver.recv_or_disconnected(ctx).await?.is_err()); + node.pipe.send( + io::SyncBlocksInputMessage::GetBlock { + recipient: stopped_node_key.clone(), + number: BlockNumber(1), + response, + } + .into(), + ); + assert!(response_receiver.recv(ctx).await?.is_err()); } Ok(()) @@ -572,30 +456,13 @@ async fn getting_blocks_from_peers(node_count: usize, gossip_peers: usize) { .unwrap(); } -async fn run_get_block_dispatcher( - ctx: &ctx::Ctx, - mut receiver: channel::UnboundedReceiver, - block: FinalBlock, -) { - while let Ok(message) = receiver.recv(ctx).await { - match message { - io::OutputMessage::SyncBlocks(io::SyncBlocksRequest::GetBlock { - block_number, - response, - }) => { - assert_eq!(block_number, BlockNumber(1)); - response.send(Ok(block.clone())).ok(); - } - other => panic!("received unexpected message: {other:?}"), - } - } -} - /// When validator node is restarted, it should immediately override /// the AccountData that is present in the network from the previous run. #[tokio::test] async fn validator_node_restart() { abort_on_panic(); + let _guard = set_timeout(time::Duration::seconds(5)); + let clock = &ctx::ManualClock::new(); let ctx = &ctx::test_root(clock); let rng = &mut ctx.rng(); @@ -603,13 +470,26 @@ async fn validator_node_restart() { let zero = time::Duration::ZERO; let sec = time::Duration::seconds(1); + let setup = validator::testonly::GenesisSetup::new(rng, 2); + let mut cfgs = testonly::new_configs(rng, &setup, 1); + let (store, store_runner) = new_store(ctx, &setup.blocks[0]).await; + let (mut node1, node1_runner) = testonly::Instance::new(cfgs[1].clone(), store.clone()); scope::run!(ctx, |ctx, s| async { - let mut cfgs = testonly::Instance::new_configs(rng, 2, 1); - let mut node1 = testonly::Instance::from_cfg(cfgs[1].clone()); - let (pipe, _) = pipe::new(); + s.spawn_bg(store_runner.run(ctx)); s.spawn_bg( - run_network(ctx, node1.state.clone(), pipe).instrument(tracing::info_span!("node1")), + node1_runner + .run(ctx) + .instrument(tracing::info_span!("node1")), ); + s.spawn_bg(async { + // Progress time whenever node1 receives an update. + // TODO(gprusak): alternatively we could entirely disable time progress + // by setting refresh time to 0 in tests. + while let Ok(Event::ValidatorAddrsUpdated) = node1.events.recv(ctx).await { + clock.advance(rpc::push_validator_addrs::Rpc::RATE.refresh); + } + Ok(()) + }); // We restart the node0 after shifting the UTC clock back and forth. // node0 is expected to learn what was is the currently broadcasted @@ -617,9 +497,9 @@ async fn validator_node_restart() { let mut utc_times = HashSet::new(); let start = ctx.now_utc(); for clock_shift in [zero, sec, -2 * sec, 4 * sec, 10 * sec, -30 * sec] { + tracing::error!("DUPA {clock_shift}"); // Set the new addr to broadcast. let mutated_config = cfgs[0].consensus.as_mut().unwrap(); - let key0 = mutated_config.key.public(); let addr0 = mk_addr(rng); mutated_config.public_addr = addr0; // Shift the UTC clock. @@ -632,37 +512,20 @@ async fn validator_node_restart() { tracing::info!("now = {now:?}"); scope::run!(ctx, |ctx, s| async { - let node0 = testonly::Instance::from_cfg(cfgs[0].clone()); - let (pipe, _) = pipe::new(); - s.spawn_bg( - run_network(ctx, node0.state.clone(), pipe) - .instrument(tracing::info_span!("node0")), - ); - s.spawn_bg(async { - // Progress time whenever node1 receives an update. - // TODO(gprusak): alternatively we could entirely disable time progress - // by setting refresh time to 0 in tests. - while let Ok(ev) = node1.events.recv(ctx).await { - if let Event::ValidatorAddrsUpdated = ev { - clock.advance(rpc::sync_validator_addrs::Rpc::RATE.refresh); - } - } - Ok(()) - }); - node1 - .state - .gossip - .validator_addrs - .subscribe() - .wait_for(|got| { - let Some(got) = got.get(&key0) else { - return false; - }; - tracing::info!("got.addr = {}", got.msg.addr); - got.msg.addr == addr0 - }) - .await - .unwrap(); + // _node0 contains pipe, which has to exist to prevent the connection from dying + // early. + let (_node0, runner) = testonly::Instance::new(cfgs[0].clone(), store.clone()); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node0"))); + tracing::info!("wait for the update to arrive to node1"); + let sub = &mut node1.state.gossip.validator_addrs.subscribe(); + sync::wait_for(ctx, sub, |got| { + let Some(got) = got.get(&setup.keys[0].public()) else { + return false; + }; + tracing::info!("got.addr = {}", got.msg.addr); + got.msg.addr == addr0 + }) + .await?; Ok(()) }) .await?; @@ -686,7 +549,8 @@ async fn rate_limiting() { // construct star topology. let n = 10; - let mut cfgs = testonly::Instance::new_configs(rng, n, 0); + let setup = validator::testonly::GenesisSetup::new(rng, n); + let mut cfgs = testonly::new_configs(rng, &setup, 0); let want: HashMap<_, _> = cfgs .iter() .map(|cfg| { @@ -699,50 +563,35 @@ async fn rate_limiting() { let public_addr = cfgs[i].consensus.as_ref().unwrap().public_addr; cfgs[0].gossip.static_outbound.insert(key, public_addr); } - let mut nodes: Vec<_> = cfgs.into_iter().map(testonly::Instance::from_cfg).collect(); - + let mut nodes = vec![]; scope::run!(ctx, |ctx, s| async { + let (store, runner) = new_store(ctx, &setup.blocks[0]).await; + s.spawn_bg(runner.run(ctx)); // Spawn the satellite nodes and wait until they register // their own address. - for (i, node) in nodes.iter().enumerate().skip(1) { - let (pipe, _) = pipe::new(); - s.spawn_bg( - run_network(ctx, node.state.clone(), pipe) - .instrument(tracing::info_span!("node", i)), - ); - node.state - .gossip - .validator_addrs - .subscribe() - .wait_for(|got| got.get(&node.consensus_config().key.public()).is_some()) - .await - .unwrap(); + for (i, cfg) in cfgs[1..].iter().enumerate() { + let (node, runner) = testonly::Instance::new(cfg.clone(), store.clone()); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); + let sub = &mut node.state.gossip.validator_addrs.subscribe(); + sync::wait_for(ctx, sub, |got| { + got.get(&node.consensus_config().key.public()).is_some() + }) + .await + .unwrap(); + nodes.push(node); } + // Spawn the center node. - let (pipe, _) = pipe::new(); - s.spawn_bg( - run_network(ctx, nodes[0].state.clone(), pipe) - .instrument(tracing::info_span!("node[0]")), - ); + let (center, runner) = testonly::Instance::new(cfgs[0].clone(), store.clone()); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node[0]"))); // Await for the center to receive all validator addrs. - nodes[0] - .state - .gossip - .validator_addrs - .subscribe() - .wait_for(|got| want == to_addr_map(got)) - .await - .unwrap(); + let sub = &mut center.state.gossip.validator_addrs.subscribe(); + sync::wait_for(ctx, sub, |got| want == to_addr_map(got)).await?; // Advance time and wait for all other nodes to receive validator addrs. - clock.advance(rpc::sync_validator_addrs::Rpc::RATE.refresh); - for node in &nodes[1..n] { - node.state - .gossip - .validator_addrs - .subscribe() - .wait_for(|got| want == to_addr_map(got)) - .await - .unwrap(); + clock.advance(rpc::push_validator_addrs::Rpc::RATE.refresh); + for node in &nodes { + let sub = &mut node.state.gossip.validator_addrs.subscribe(); + sync::wait_for(ctx, sub, |got| want == to_addr_map(got)).await?; } Ok(()) }) @@ -750,12 +599,10 @@ async fn rate_limiting() { .unwrap(); // Check that the satellite nodes received either 1 or 2 updates. - for n in &mut nodes[1..] { + for n in &mut nodes { let mut count = 0; - while let Some(ev) = n.events.try_recv() { - if let Event::ValidatorAddrsUpdated = ev { - count += 1; - } + while let Some(Event::ValidatorAddrsUpdated) = n.events.try_recv() { + count += 1; } assert!((1..=2).contains(&count)); } diff --git a/node/actors/network/src/io.rs b/node/actors/network/src/io.rs index 336d665c..9747c8ed 100644 --- a/node/actors/network/src/io.rs +++ b/node/actors/network/src/io.rs @@ -1,6 +1,7 @@ #![allow(missing_docs)] use zksync_concurrency::oneshot; use zksync_consensus_roles::{node, validator}; +use zksync_consensus_storage::BlockStoreState; /// All the messages that other actors can send to the Network actor. #[derive(Debug)] @@ -31,8 +32,7 @@ pub enum SyncBlocksInputMessage { GetBlock { recipient: node::PublicKey, number: validator::BlockNumber, - /// If the peer is unavailable, `response` will be dropped. - response: oneshot::Sender, + response: oneshot::Sender>, }, } @@ -53,45 +53,18 @@ pub struct ConsensusReq { pub ack: oneshot::Sender<()>, } -/// Current block sync state of a node sent in response to [`GetSyncStateRequest`]. -#[derive(Debug, Clone, PartialEq)] -pub struct SyncState { - pub first_stored_block: validator::CommitQC, - pub last_stored_block: validator::CommitQC, -} - -/// Projection of [`SyncState`] comprised of block numbers. -#[derive(Debug, Clone, Copy)] -pub struct SyncStateNumbers { - pub first_stored_block: validator::BlockNumber, - pub last_stored_block: validator::BlockNumber, -} - -impl SyncState { - /// Returns numbers for block QCs contained in this state. - pub fn numbers(&self) -> SyncStateNumbers { - SyncStateNumbers { - first_stored_block: self.first_stored_block.message.proposal.number, - last_stored_block: self.last_stored_block.message.proposal.number, - } - } -} - -/// Errors returned from a [`GetBlockResponse`]. +/// Error returned in response to [`GetBlock`] call. /// /// Note that these errors don't include network-level errors, only app-level ones. #[derive(Debug, thiserror::Error)] pub enum GetBlockError { - /// Transient error: the node doesn't have the requested L2 block and plans to get it in the future - /// by syncing. - #[error( - "node doesn't have the requested L2 block and plans to get it in the future by syncing" - )] - NotSynced, + /// Transient error: the node doesn't have the requested L2 block. + #[error("node doesn't have the requested L2 block")] + NotAvailable, + #[error(transparent)] + Internal(#[from] anyhow::Error), } -pub type GetBlockResponse = Result; - #[derive(Debug)] pub enum SyncBlocksRequest { /// Notifies about an update in peer's `SyncState`. @@ -99,18 +72,11 @@ pub enum SyncBlocksRequest { /// Peer that has reported the update. peer: node::PublicKey, /// Updated peer syncing state. - state: Box, + state: BlockStoreState, /// Acknowledgement response returned by the block syncing actor. // TODO: return an error in case of invalid `SyncState`? response: oneshot::Sender<()>, }, - /// Requests an L2 block with the specified number. - GetBlock { - /// Number of the block. - block_number: validator::BlockNumber, - /// Block returned by the block syncing actor. - response: oneshot::Sender, - }, } /// All the messages that the Network actor sends to other actors. diff --git a/node/actors/network/src/mux/mod.rs b/node/actors/network/src/mux/mod.rs index 03068117..6cbd9e22 100644 --- a/node/actors/network/src/mux/mod.rs +++ b/node/actors/network/src/mux/mod.rs @@ -81,6 +81,7 @@ use crate::{frame, noise::bytes}; use anyhow::Context as _; use std::{collections::BTreeMap, sync::Arc}; use zksync_concurrency::{ctx, ctx::channel, io, scope, sync}; +use zksync_protobuf::ProtoFmt as _; mod config; mod handshake; @@ -273,12 +274,12 @@ impl Mux { ) -> Result<(), RunError> { self.verify().map_err(RunError::Config)?; let (mut read, mut write) = io::split(transport); - let handshake = scope::run!(ctx, |ctx, s| async { + let handshake: Handshake = scope::run!(ctx, |ctx, s| async { s.spawn(async { let h = self.handshake(); frame::send_proto(ctx, &mut write, &h).await }); - frame::recv_proto::(ctx, &mut read).await + frame::recv_proto(ctx, &mut read, Handshake::max_size()).await }) .await .map_err(RunError::Protocol)?; diff --git a/node/actors/network/src/mux/reusable_stream.rs b/node/actors/network/src/mux/reusable_stream.rs index a0fe6a14..882040db 100644 --- a/node/actors/network/src/mux/reusable_stream.rs +++ b/node/actors/network/src/mux/reusable_stream.rs @@ -282,7 +282,7 @@ impl ReusableStream { let read = recv_open_task.join(ctx).await?; (read, reservation) } - _ => unreachable!("bad StreakKind"), + _ => unreachable!("bad StreamKind"), }; let (read_lock, new_read_receiver) = sync::ExclusiveLock::new(read); diff --git a/node/actors/network/src/mux/tests/mod.rs b/node/actors/network/src/mux/tests/mod.rs index 3c91c8ee..e797a917 100644 --- a/node/actors/network/src/mux/tests/mod.rs +++ b/node/actors/network/src/mux/tests/mod.rs @@ -10,6 +10,8 @@ use std::{ }; use zksync_concurrency::{ctx, scope, testonly::abort_on_panic}; use zksync_consensus_utils::no_copy::NoCopy; +use zksync_protobuf::ProtoFmt as _; + mod proto; fn assert_partition(sets: &[u16]) { @@ -120,7 +122,9 @@ async fn run_server( assert!(count.fetch_add(1, Ordering::SeqCst) < queue.max_streams as usize); s.spawn(async { let mut stream = stream; - while let Ok((req, _)) = frame::mux_recv_proto::(ctx, &mut stream.read).await { + while let Ok((req, _)) = + frame::mux_recv_proto::(ctx, &mut stream.read, Req::max_size()).await + { let resp = Resp { output: rpc_handler(req.0), capability_id: cap, @@ -160,7 +164,9 @@ async fn run_client( rng.fill(&mut req[..]); frame::mux_send_proto(ctx, &mut stream.write, &Req(req.clone())).await?; stream.write.flush(ctx).await?; - let (resp, _) = frame::mux_recv_proto::(ctx, &mut stream.read).await?; + let (resp, _) = + frame::mux_recv_proto::(ctx, &mut stream.read, Resp::max_size()) + .await?; assert_eq!(resp.output, rpc_handler(req)); assert_eq!(resp.capability_id, cap); } diff --git a/node/actors/network/src/preface.rs b/node/actors/network/src/preface.rs index a8a8ae19..78f04ea7 100644 --- a/node/actors/network/src/preface.rs +++ b/node/actors/network/src/preface.rs @@ -92,8 +92,8 @@ pub(crate) async fn accept( mut stream: metrics::MeteredStream, ) -> anyhow::Result<(noise::Stream, Endpoint)> { let ctx = &ctx.with_timeout(TIMEOUT); - let _: Encryption = frame::recv_proto(ctx, &mut stream).await?; + let _: Encryption = frame::recv_proto(ctx, &mut stream, Encryption::max_size()).await?; let mut stream = noise::Stream::server_handshake(ctx, stream).await?; - let endpoint = frame::recv_proto(ctx, &mut stream).await?; + let endpoint = frame::recv_proto(ctx, &mut stream, Encryption::max_size()).await?; Ok((stream, endpoint)) } diff --git a/node/actors/network/src/proto/gossip.proto b/node/actors/network/src/proto/gossip.proto index 850191d1..976ba1b8 100644 --- a/node/actors/network/src/proto/gossip.proto +++ b/node/actors/network/src/proto/gossip.proto @@ -11,9 +11,7 @@ message Handshake { optional bool is_static = 2; } -message SyncValidatorAddrsReq {} - -message SyncValidatorAddrsResp { +message PushValidatorAddrs { // Signed roles.validator.Msg.net_address. repeated roles.validator.Signed net_addresses = 1; } @@ -21,16 +19,11 @@ message SyncValidatorAddrsResp { // State of the local block store. // A node is expected to store a continuous range of blocks at all times // and actively fetch newest blocks. -message SyncState { +message PushBlockStoreState { // First L2 block that the node has locally. - optional roles.validator.CommitQC first_stored_block = 1; + optional roles.validator.CommitQC first = 1; // Last L2 block that the node has locally. - optional roles.validator.CommitQC last_stored_block = 2; -} - -// Response to `SyncState` acknowledging its processing. -message SyncStateResponse { - // intentionally empty + optional roles.validator.CommitQC last = 2; } // Asks the server to send an L2 block (including its transactions). @@ -39,28 +32,7 @@ message GetBlockRequest { optional uint64 number = 1; } -// Response to a `GetBlockRequest` containing a block or a reason it cannot be retrieved. +// Response to a `GetBlockRequest`. message GetBlockResponse { - // Errors returned from a `GetBlockResponse`. - // - // Note that these errors don't include network-level errors, only app-level ones. - message Error { - // Reason processing the request has failed. - optional ErrorReason reason = 1; - } - - // Reason processing a `GetBlockRequest` has failed. - enum ErrorReason { - // Transient error: the node doesn't have the requested L2 block, - // but plans to get it in the future by syncing. - NOT_SYNCED = 0; - } - - // Result of processing a `GetBlockRequest`. - oneof result { - // The request was successfully processed. - roles.validator.FinalBlock block = 1; - // The request resulted in an error. - Error error = 2; - } + optional roles.validator.FinalBlock block = 1; // optional; missing if block is not available } diff --git a/node/actors/network/src/rpc/consensus.rs b/node/actors/network/src/rpc/consensus.rs index f266f2b6..498cd054 100644 --- a/node/actors/network/src/rpc/consensus.rs +++ b/node/actors/network/src/rpc/consensus.rs @@ -43,10 +43,6 @@ impl ProtoFmt for Req { msg: Some(self.0.build()), } } - - fn max_size() -> usize { - zksync_protobuf::MB - } } impl ProtoFmt for Resp { @@ -59,8 +55,4 @@ impl ProtoFmt for Resp { fn build(&self) -> Self::Proto { Self::Proto {} } - - fn max_size() -> usize { - zksync_protobuf::kB - } } diff --git a/node/actors/network/src/rpc/get_block.rs b/node/actors/network/src/rpc/get_block.rs new file mode 100644 index 00000000..c4996168 --- /dev/null +++ b/node/actors/network/src/rpc/get_block.rs @@ -0,0 +1,62 @@ +//! RPC for fetching a block from peer. +use crate::{mux, proto::gossip as proto}; +use anyhow::Context; +use zksync_concurrency::{limiter, time}; +use zksync_consensus_roles::validator::{BlockNumber, FinalBlock}; +use zksync_protobuf::{read_optional, ProtoFmt}; + +/// `get_block` RPC. +#[derive(Debug)] +pub(crate) struct Rpc; + +// TODO: determine more precise `INFLIGHT` / `RATE` values as a result of load testing +impl super::Rpc for Rpc { + const CAPABILITY_ID: mux::CapabilityId = 4; + const INFLIGHT: u32 = 5; + const RATE: limiter::Rate = limiter::Rate { + burst: 10, + refresh: time::Duration::milliseconds(100), + }; + const METHOD: &'static str = "get_block"; + + type Req = Req; + type Resp = Resp; +} + +/// Asks the server to send a block (including its transactions). +#[derive(Debug, PartialEq)] +pub(crate) struct Req(pub(crate) BlockNumber); + +impl ProtoFmt for Req { + type Proto = proto::GetBlockRequest; + + fn read(message: &Self::Proto) -> anyhow::Result { + let number = message.number.context("number")?; + Ok(Self(BlockNumber(number))) + } + + fn build(&self) -> Self::Proto { + let BlockNumber(number) = self.0; + Self::Proto { + number: Some(number), + } + } +} + +/// Response to a [`GetBlockRequest`] containing a block or a reason it cannot be retrieved. +#[derive(Debug, PartialEq)] +pub(crate) struct Resp(pub(crate) Option); + +impl ProtoFmt for Resp { + type Proto = proto::GetBlockResponse; + + fn read(r: &Self::Proto) -> anyhow::Result { + Ok(Self(read_optional(&r.block).context("block")?)) + } + + fn build(&self) -> Self::Proto { + Self::Proto { + block: self.0.as_ref().map(ProtoFmt::build), + } + } +} diff --git a/node/actors/network/src/rpc/mod.rs b/node/actors/network/src/rpc/mod.rs index 77f9a52a..d98a08b3 100644 --- a/node/actors/network/src/rpc/mod.rs +++ b/node/actors/network/src/rpc/mod.rs @@ -22,10 +22,11 @@ use std::{collections::BTreeMap, sync::Arc}; use zksync_concurrency::{ctx, io, limiter, metrics::LatencyHistogramExt as _, scope}; pub(crate) mod consensus; +pub(crate) mod get_block; mod metrics; pub(crate) mod ping; -pub(crate) mod sync_blocks; -pub(crate) mod sync_validator_addrs; +pub(crate) mod push_block_store_state; +pub(crate) mod push_validator_addrs; #[cfg(test)] pub(crate) mod testonly; #[cfg(test)] @@ -82,24 +83,31 @@ pub(crate) struct ReservedCall<'a, R: Rpc> { impl<'a, R: Rpc> ReservedCall<'a, R> { /// Performs the call. - pub(crate) async fn call(self, ctx: &ctx::Ctx, req: &R::Req) -> anyhow::Result { + pub(crate) async fn call( + self, + ctx: &ctx::Ctx, + req: &R::Req, + max_resp_size: usize, + ) -> anyhow::Result { let send_time = ctx.now(); let mut stream = self.stream.open(ctx).await??; drop(self.permit); let res = async { let metric_labels = CallType::Client.to_labels::(req); let _guard = RPC_METRICS.inflight[&metric_labels].inc_guard(1); - let msg_size = frame::mux_send_proto(ctx, &mut stream.write, req).await?; + let msg_size = frame::mux_send_proto(ctx, &mut stream.write, req) + .await + .context("mux_send_proto(req)")?; RPC_METRICS.message_size[&CallType::ReqSent.to_labels::(req)].observe(msg_size); drop(stream.write); - frame::mux_recv_proto(ctx, &mut stream.read).await + frame::mux_recv_proto(ctx, &mut stream.read, max_resp_size).await } .await; let now = ctx.now(); let metric_labels = CallLatencyType::ClientSendRecv.to_labels::(req, &res); RPC_METRICS.latency[&metric_labels].observe_latency(now - send_time); - let (res, msg_size) = res?; + let (res, msg_size) = res.context(R::METHOD)?; RPC_METRICS.message_size[&CallType::RespRecv.to_labels::(req)].observe(msg_size); Ok(res) } @@ -126,14 +134,10 @@ impl Client { pub(crate) async fn reserve<'a>( &'a self, ctx: &'a ctx::Ctx, - ) -> anyhow::Result> { + ) -> ctx::OrCanceled> { let reserve_time = ctx.now(); - let permit = self.limiter.acquire(ctx, 1).await.context("limiter")?; - let stream = self - .queue - .reserve(ctx) - .await - .context("StreamQueue::open()")?; + let permit = self.limiter.acquire(ctx, 1).await?; + let stream = self.queue.reserve(ctx).await?; RPC_METRICS.call_reserve_latency[&R::METHOD].observe_latency(ctx.now() - reserve_time); Ok(ReservedCall { stream, @@ -143,8 +147,17 @@ impl Client { } /// Performs an RPC. - pub(crate) async fn call(&self, ctx: &ctx::Ctx, req: &R::Req) -> anyhow::Result { - self.reserve(ctx).await?.call(ctx, req).await + pub(crate) async fn call( + &self, + ctx: &ctx::Ctx, + req: &R::Req, + max_resp_size: usize, + ) -> ctx::Result { + Ok(self + .reserve(ctx) + .await? + .call(ctx, req, max_resp_size) + .await?) } } @@ -153,6 +166,9 @@ impl Client { pub(crate) trait Handler: Sync + Send { /// Processes the request and returns the response. async fn handle(&self, ctx: &ctx::Ctx, req: R::Req) -> anyhow::Result; + /// Upper bound on the proto-encoded request size. + /// It protects us from buffering maliciously large messages. + fn max_req_size(&self) -> usize; } /// Internal: an RPC server which wraps the Handler. @@ -182,8 +198,12 @@ impl> ServerTrait for Server { drop(permit); let res = async { let recv_time = ctx.now(); - let (req, msg_size) = - frame::mux_recv_proto::(ctx, &mut stream.read).await?; + let (req, msg_size) = frame::mux_recv_proto::( + ctx, + &mut stream.read, + self.handler.max_req_size(), + ) + .await?; let size_labels = CallType::ReqRecv.to_labels::(&req); let resp_size_labels = CallType::RespSent.to_labels::(&req); diff --git a/node/actors/network/src/rpc/ping.rs b/node/actors/network/src/rpc/ping.rs index 8bd2b79c..5307eb1c 100644 --- a/node/actors/network/src/rpc/ping.rs +++ b/node/actors/network/src/rpc/ping.rs @@ -3,7 +3,7 @@ use crate::{mux, proto::ping as proto, rpc::Rpc as _}; use anyhow::Context as _; use rand::Rng; use zksync_concurrency::{ctx, limiter, time}; -use zksync_protobuf::{required, ProtoFmt}; +use zksync_protobuf::{kB, required, ProtoFmt}; /// Ping RPC. pub(crate) struct Rpc; @@ -26,6 +26,9 @@ pub(crate) struct Server; #[async_trait::async_trait] impl super::Handler for Server { + fn max_req_size(&self) -> usize { + kB + } async fn handle(&self, _ctx: &ctx::Ctx, req: Req) -> anyhow::Result { Ok(Resp(req.0)) } @@ -44,7 +47,7 @@ impl super::Client { ctx.sleep(Rpc::RATE.refresh).await?; let ctx = &ctx.with_timeout(timeout); let req = Req(ctx.rng().gen()); - let resp = self.call(ctx, &req).await.context("ping")?; + let resp = self.call(ctx, &req, kB).await.context("ping")?; if req.0 != resp.0 { anyhow::bail!("bad ping response"); } @@ -63,9 +66,6 @@ pub(crate) struct Resp(pub(crate) [u8; 32]); impl ProtoFmt for Req { type Proto = proto::PingReq; - fn max_size() -> usize { - zksync_protobuf::kB - } fn read(r: &Self::Proto) -> anyhow::Result { Ok(Self(required(&r.data)?[..].try_into()?)) } @@ -78,9 +78,6 @@ impl ProtoFmt for Req { impl ProtoFmt for Resp { type Proto = proto::PingResp; - fn max_size() -> usize { - zksync_protobuf::kB - } fn read(r: &Self::Proto) -> anyhow::Result { Ok(Self(required(&r.data)?[..].try_into()?)) } diff --git a/node/actors/network/src/rpc/push_block_store_state.rs b/node/actors/network/src/rpc/push_block_store_state.rs new file mode 100644 index 00000000..ef340c25 --- /dev/null +++ b/node/actors/network/src/rpc/push_block_store_state.rs @@ -0,0 +1,45 @@ +//! RPC for notifying peer about our BlockStore state. +use crate::{mux, proto::gossip as proto}; +use anyhow::Context; +use zksync_concurrency::{limiter, time}; +use zksync_consensus_storage::BlockStoreState; +use zksync_protobuf::{read_required, ProtoFmt}; + +/// PushBlockStoreState RPC. +#[derive(Debug)] +pub(crate) struct Rpc; + +impl super::Rpc for Rpc { + const CAPABILITY_ID: mux::CapabilityId = 3; + const INFLIGHT: u32 = 1; + const RATE: limiter::Rate = limiter::Rate { + burst: 2, + refresh: time::Duration::milliseconds(500), + }; + const METHOD: &'static str = "push_block_store_state"; + + type Req = Req; + type Resp = (); +} + +/// Contains the freshest state of the sender's block store. +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct Req(pub(crate) BlockStoreState); + +impl ProtoFmt for Req { + type Proto = proto::PushBlockStoreState; + + fn read(message: &Self::Proto) -> anyhow::Result { + Ok(Self(BlockStoreState { + first: read_required(&message.first).context("first")?, + last: read_required(&message.last).context("last")?, + })) + } + + fn build(&self) -> Self::Proto { + Self::Proto { + first: Some(self.0.first.build()), + last: Some(self.0.last.build()), + } + } +} diff --git a/node/actors/network/src/rpc/sync_validator_addrs.rs b/node/actors/network/src/rpc/push_validator_addrs.rs similarity index 52% rename from node/actors/network/src/rpc/sync_validator_addrs.rs rename to node/actors/network/src/rpc/push_validator_addrs.rs index 35b31370..020a984f 100644 --- a/node/actors/network/src/rpc/sync_validator_addrs.rs +++ b/node/actors/network/src/rpc/push_validator_addrs.rs @@ -1,4 +1,4 @@ -//! Defines an Rpc for synchronizing ValidatorAddrs data. +//! RPC for synchronizing ValidatorAddrs data. use crate::{mux, proto::gossip as proto}; use anyhow::Context as _; use std::sync::Arc; @@ -6,7 +6,7 @@ use zksync_concurrency::{limiter, time}; use zksync_consensus_roles::validator; use zksync_protobuf::ProtoFmt; -/// SyncValidatorAddrs Rpc. +/// PushValidatorAddrs RPC. pub(crate) struct Rpc; impl super::Rpc for Rpc { @@ -16,40 +16,18 @@ impl super::Rpc for Rpc { burst: 1, refresh: time::Duration::seconds(5), }; - const METHOD: &'static str = "sync_validator_addrs"; + const METHOD: &'static str = "push_validator_addrs"; type Req = Req; - type Resp = Resp; + type Resp = (); } -/// Ask server to send ValidatorAddrs update. +/// Contains a batch of new ValidatorAddrs that the sender has learned about. #[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) struct Req; - -/// Response to SyncValidatorAddrsReq. -/// Contains a batch of new ValidatorAddrs that server -/// learned since the client's last SyncValidatorAddrs Rpc. -#[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) struct Resp(pub(crate) Vec>>); +pub(crate) struct Req(pub(crate) Vec>>); impl ProtoFmt for Req { - type Proto = proto::SyncValidatorAddrsReq; - - fn read(_r: &Self::Proto) -> anyhow::Result { - Ok(Self) - } - - fn build(&self) -> Self::Proto { - Self::Proto {} - } - - fn max_size() -> usize { - zksync_protobuf::kB - } -} - -impl ProtoFmt for Resp { - type Proto = proto::SyncValidatorAddrsResp; + type Proto = proto::PushValidatorAddrs; fn read(r: &Self::Proto) -> anyhow::Result { let mut addrs = vec![]; @@ -66,8 +44,4 @@ impl ProtoFmt for Resp { net_addresses: self.0.iter().map(|a| ProtoFmt::build(a.as_ref())).collect(), } } - - fn max_size() -> usize { - zksync_protobuf::MB - } } diff --git a/node/actors/network/src/rpc/sync_blocks.rs b/node/actors/network/src/rpc/sync_blocks.rs deleted file mode 100644 index 827eac50..00000000 --- a/node/actors/network/src/rpc/sync_blocks.rs +++ /dev/null @@ -1,179 +0,0 @@ -//! Defines RPC for synchronizing blocks. -use crate::{io, mux, proto::gossip as proto}; -use anyhow::Context; -use zksync_concurrency::{limiter, time}; -use zksync_consensus_roles::validator::{BlockNumber, FinalBlock}; -use zksync_protobuf::{read_required, ProtoFmt}; - -/// `get_sync_state` RPC. -#[derive(Debug)] -pub(crate) struct PushSyncStateRpc; - -impl super::Rpc for PushSyncStateRpc { - const CAPABILITY_ID: mux::CapabilityId = 3; - const INFLIGHT: u32 = 1; - const RATE: limiter::Rate = limiter::Rate { - burst: 1, - refresh: time::Duration::seconds(5), - }; - const METHOD: &'static str = "push_sync_state"; - - type Req = io::SyncState; - type Resp = SyncStateResponse; -} - -impl ProtoFmt for io::SyncState { - type Proto = proto::SyncState; - - fn read(message: &Self::Proto) -> anyhow::Result { - Ok(Self { - first_stored_block: read_required(&message.first_stored_block) - .context("first_stored_block")?, - last_stored_block: read_required(&message.last_stored_block) - .context("last_stored_block")?, - }) - } - - fn build(&self) -> Self::Proto { - Self::Proto { - first_stored_block: Some(self.first_stored_block.build()), - last_stored_block: Some(self.last_stored_block.build()), - } - } - - fn max_size() -> usize { - // TODO: estimate maximum size more precisely - 100 * zksync_protobuf::kB - } -} - -/// Response to [`SyncState`] acknowledging its processing. -#[derive(Debug, Clone, Copy)] -pub(crate) struct SyncStateResponse; - -impl ProtoFmt for SyncStateResponse { - type Proto = proto::SyncStateResponse; - - fn read(_message: &Self::Proto) -> anyhow::Result { - Ok(Self) - } - - fn build(&self) -> Self::Proto { - Self::Proto {} - } - - fn max_size() -> usize { - zksync_protobuf::kB - } -} - -/// `get_block` RPC. -#[derive(Debug)] -pub(crate) struct GetBlockRpc; - -// TODO: determine more precise `INFLIGHT` / `RATE` values as a result of load testing -impl super::Rpc for GetBlockRpc { - const CAPABILITY_ID: mux::CapabilityId = 4; - const INFLIGHT: u32 = 5; - const RATE: limiter::Rate = limiter::Rate { - burst: 10, - refresh: time::Duration::milliseconds(100), - }; - const METHOD: &'static str = "get_block"; - - type Req = GetBlockRequest; - type Resp = GetBlockResponse; -} - -/// Asks the server to send a block (including its transactions). -#[derive(Debug)] -pub(crate) struct GetBlockRequest(pub(crate) BlockNumber); - -impl ProtoFmt for GetBlockRequest { - type Proto = proto::GetBlockRequest; - - fn read(message: &Self::Proto) -> anyhow::Result { - let number = message.number.context("number")?; - Ok(Self(BlockNumber(number))) - } - - fn build(&self) -> Self::Proto { - let BlockNumber(number) = self.0; - Self::Proto { - number: Some(number), - } - } - - fn max_size() -> usize { - zksync_protobuf::kB - } -} - -impl ProtoFmt for io::GetBlockError { - type Proto = proto::get_block_response::Error; - - fn read(message: &Self::Proto) -> anyhow::Result { - use proto::get_block_response::ErrorReason; - - let reason = message.reason.context("missing reason")?; - let reason = ErrorReason::try_from(reason).context("reason")?; - Ok(match reason { - ErrorReason::NotSynced => Self::NotSynced, - }) - } - - fn build(&self) -> Self::Proto { - use proto::get_block_response::ErrorReason; - - Self::Proto { - reason: Some(match self { - Self::NotSynced => ErrorReason::NotSynced as i32, - }), - } - } - - fn max_size() -> usize { - zksync_protobuf::kB - } -} - -/// Response to a [`GetBlockRequest`] containing a block or a reason it cannot be retrieved. -#[derive(Debug)] -pub(crate) struct GetBlockResponse(pub(crate) io::GetBlockResponse); - -impl From for GetBlockResponse { - fn from(response: io::GetBlockResponse) -> Self { - Self(response) - } -} - -impl ProtoFmt for GetBlockResponse { - type Proto = proto::GetBlockResponse; - - fn read(message: &Self::Proto) -> anyhow::Result { - use proto::get_block_response::Result as GetBlockResult; - - let result = message.result.as_ref().context("missing result")?; - let result = match result { - GetBlockResult::Block(block) => Ok(FinalBlock::read(block).context("block")?), - GetBlockResult::Error(error) => Err(io::GetBlockError::read(error).context("error")?), - }; - Ok(Self(result)) - } - - fn build(&self) -> Self::Proto { - use proto::get_block_response::Result as GetBlockResult; - - let result = match &self.0 { - Ok(block) => GetBlockResult::Block(block.build()), - Err(err) => GetBlockResult::Error(err.build()), - }; - Self::Proto { - result: Some(result), - } - } - - fn max_size() -> usize { - zksync_protobuf::MB - } -} diff --git a/node/actors/network/src/rpc/testonly.rs b/node/actors/network/src/rpc/testonly.rs index d176790e..f3f4ca7c 100644 --- a/node/actors/network/src/rpc/testonly.rs +++ b/node/actors/network/src/rpc/testonly.rs @@ -2,23 +2,31 @@ //! Implementations of Distribution are supposed to generate realistic data, //! but in fact they are "best-effort realistic" - they might need an upgrade, //! if tests require stricter properties of the generated data. -use super::{consensus, sync_validator_addrs, Arc}; +use crate::rpc; use rand::{ distributions::{Distribution, Standard}, Rng, }; +use std::sync::Arc; use zksync_consensus_roles::validator; +use zksync_consensus_storage::BlockStoreState; -impl Distribution for Standard { - fn sample(&self, rng: &mut R) -> consensus::Req { - consensus::Req(rng.gen()) +impl Distribution for Standard { + fn sample(&self, rng: &mut R) -> rpc::consensus::Req { + rpc::consensus::Req(rng.gen()) } } -impl Distribution for Standard { - fn sample(&self, rng: &mut R) -> sync_validator_addrs::Resp { +impl Distribution for Standard { + fn sample(&self, _rng: &mut R) -> rpc::consensus::Resp { + rpc::consensus::Resp + } +} + +impl Distribution for Standard { + fn sample(&self, rng: &mut R) -> rpc::push_validator_addrs::Req { let n = rng.gen_range(5..10); - sync_validator_addrs::Resp( + rpc::push_validator_addrs::Req( (0..n) .map(|_| { let key: validator::SecretKey = rng.gen(); @@ -29,3 +37,24 @@ impl Distribution for Standard { ) } } + +impl Distribution for Standard { + fn sample(&self, rng: &mut R) -> rpc::push_block_store_state::Req { + rpc::push_block_store_state::Req(BlockStoreState { + first: rng.gen(), + last: rng.gen(), + }) + } +} + +impl Distribution for Standard { + fn sample(&self, rng: &mut R) -> rpc::get_block::Req { + rpc::get_block::Req(rng.gen()) + } +} + +impl Distribution for Standard { + fn sample(&self, rng: &mut R) -> rpc::get_block::Resp { + rpc::get_block::Resp(Some(rng.gen())) + } +} diff --git a/node/actors/network/src/rpc/tests.rs b/node/actors/network/src/rpc/tests.rs index 04f28c2a..96020721 100644 --- a/node/actors/network/src/rpc/tests.rs +++ b/node/actors/network/src/rpc/tests.rs @@ -6,14 +6,16 @@ use std::{ sync::atomic::{AtomicU64, Ordering}, }; use zksync_concurrency::{ctx, testonly::abort_on_panic, time}; -use zksync_protobuf::testonly::test_encode_random; +use zksync_protobuf::{kB, testonly::test_encode_random}; /// CAPABILITY_ID should uniquely identify the RPC. #[test] fn test_capability_rpc_correspondence() { let ids = [ consensus::Rpc::CAPABILITY_ID, - sync_validator_addrs::Rpc::CAPABILITY_ID, + push_validator_addrs::Rpc::CAPABILITY_ID, + push_block_store_state::Rpc::CAPABILITY_ID, + get_block::Rpc::CAPABILITY_ID, ping::Rpc::CAPABILITY_ID, ]; assert_eq!(ids.len(), HashSet::from(ids).len()); @@ -22,8 +24,12 @@ fn test_capability_rpc_correspondence() { #[test] fn test_schema_encode_decode() { let rng = &mut ctx::test_root(&ctx::RealClock).rng(); - test_encode_random::<_, consensus::Req>(rng); - test_encode_random::<_, sync_validator_addrs::Resp>(rng); + test_encode_random::(rng); + test_encode_random::(rng); + test_encode_random::(rng); + test_encode_random::(rng); + test_encode_random::(rng); + test_encode_random::(rng); } fn expected(res: Result<(), mux::RunError>) -> Result<(), mux::RunError> { @@ -49,13 +55,13 @@ async fn test_ping() { }); for _ in 0..ping::Rpc::RATE.burst { let req = ping::Req(ctx.rng().gen()); - let resp = client.call(ctx, &req).await?; + let resp = client.call(ctx, &req, kB).await?; assert_eq!(req.0, resp.0); } let now = ctx.now(); clock.set_advance_on_sleep(); let req = ping::Req(ctx.rng().gen()); - let resp = client.call(ctx, &req).await?; + let resp = client.call(ctx, &req, kB).await?; assert_eq!(req.0, resp.0); assert!(ctx.now() >= now + ping::Rpc::RATE.refresh); Ok(()) @@ -74,6 +80,9 @@ const PING_TIMEOUT: time::Duration = time::Duration::seconds(3); #[async_trait::async_trait] impl Handler for PingServer { + fn max_req_size(&self) -> usize { + kB + } async fn handle(&self, ctx: &ctx::Ctx, req: ping::Req) -> anyhow::Result { if self.pings.fetch_add(1, Ordering::Relaxed) >= PING_COUNT { self.clock.advance(PING_TIMEOUT); @@ -143,6 +152,9 @@ struct ExampleServer; #[async_trait::async_trait] impl Handler for ExampleServer { + fn max_req_size(&self) -> usize { + kB + } async fn handle(&self, ctx: &ctx::Ctx, _req: ()) -> anyhow::Result<()> { ctx.canceled().await; anyhow::bail!("terminated"); diff --git a/node/actors/network/src/state.rs b/node/actors/network/src/state.rs index ae9269c6..545281b9 100644 --- a/node/actors/network/src/state.rs +++ b/node/actors/network/src/state.rs @@ -1,10 +1,11 @@ //! Network actor maintaining a pool of outbound and inbound connections to other nodes. use super::{consensus, event::Event, gossip, metrics, preface}; -use crate::io::{InputMessage, OutputMessage, SyncState}; +use crate::io::{InputMessage, OutputMessage}; use anyhow::Context as _; use std::sync::Arc; -use zksync_concurrency::{ctx, ctx::channel, net, scope, sync::watch}; +use zksync_concurrency::{ctx, ctx::channel, net, scope}; use zksync_consensus_roles::validator; +use zksync_consensus_storage::BlockStore; use zksync_consensus_utils::pipe::ActorPipe; /// Network actor config. @@ -20,6 +21,10 @@ pub struct Config { pub gossip: gossip::Config, /// Consensus network config. If not present, the node will not participate in the consensus network. pub consensus: Option, + /// Maximal size of the proto-encoded `validator::FinalBlock` in bytes. + pub max_block_size: usize, + /// Enables pinging the peers to make sure that they are alive. + pub enable_pings: bool, } /// Part of configuration shared among network modules. @@ -31,6 +36,10 @@ pub(crate) struct SharedConfig { /// - client should establish outbound connections to. /// - server should accept inbound connections from (1 per validator). pub(crate) validators: validator::ValidatorSet, + /// Enables pinging the peers to make sure that they are alive. + pub(crate) enable_pings: bool, + /// Maximal size of the proto-encoded `validator::FinalBlock` in bytes. + pub(crate) max_block_size: usize, } /// State of the network actor observable outside of the actor. @@ -52,20 +61,22 @@ impl State { /// Call `run_network` to run the actor. pub fn new( cfg: Config, + block_store: Arc, events: Option>, - sync_state: Option>, ) -> anyhow::Result> { let consensus = cfg .consensus .map(|consensus_cfg| consensus::State::new(consensus_cfg, &cfg.validators)) .transpose()?; let this = Self { - gossip: gossip::State::new(cfg.gossip, sync_state), + gossip: gossip::State::new(cfg.gossip, block_store), consensus, events, cfg: SharedConfig { server_addr: cfg.server_addr, validators: cfg.validators, + enable_pings: cfg.enable_pings, + max_block_size: cfg.max_block_size, }, }; Ok(Arc::new(this)) @@ -130,18 +141,9 @@ pub async fn run_network( let (stream, endpoint) = preface::accept(ctx, stream).await?; match endpoint { preface::Endpoint::ConsensusNet => { - if let Some(consensus_state) = &state.consensus { - consensus::run_inbound_stream( - ctx, - consensus_state, - &pipe.send, - stream, - ) + consensus::run_inbound_stream(ctx, &state, &pipe.send, stream) .await .context("consensus::run_inbound_stream()") - } else { - anyhow::bail!("Node does not accept consensus network connections"); - } } preface::Endpoint::GossipNet => { gossip::run_inbound_stream(ctx, &state, &pipe.send, stream) diff --git a/node/actors/network/src/testonly.rs b/node/actors/network/src/testonly.rs index d666feab..a1c76721 100644 --- a/node/actors/network/src/testonly.rs +++ b/node/actors/network/src/testonly.rs @@ -1,6 +1,6 @@ //! Testonly utilities. #![allow(dead_code)] -use crate::{consensus, event::Event, gossip, io::SyncState, Config, State}; +use crate::{consensus, event::Event, gossip, Config, State}; use rand::Rng; use std::{ collections::{HashMap, HashSet}, @@ -9,10 +9,12 @@ use std::{ use zksync_concurrency::{ ctx, ctx::channel, - io, net, - sync::{self, watch}, + io, net, scope, + sync::{self}, }; -use zksync_consensus_roles::validator; +use zksync_consensus_roles::{node, validator}; +use zksync_consensus_storage::BlockStore; +use zksync_consensus_utils::pipe; /// Synchronously forwards data from one stream to another. pub(crate) async fn forward( @@ -46,60 +48,109 @@ pub struct Instance { pub(crate) state: Arc, /// Stream of events. pub(crate) events: channel::UnboundedReceiver, + /// Termination signal that can be sent to the node. + pub(crate) terminate: channel::Sender<()>, + /// Dispatcher end of the network pipe. + pub(crate) pipe: pipe::DispatcherPipe, } -impl Instance { - /// Construct configs for `n` validators of the consensus. - pub fn new_configs(rng: &mut R, n: usize, gossip_peers: usize) -> Vec { - let keys: Vec = (0..n).map(|_| rng.gen()).collect(); - let validators = validator::ValidatorSet::new(keys.iter().map(|k| k.public())).unwrap(); - let configs = keys.iter().map(|key| { - let addr = net::tcp::testonly::reserve_listener(); - Config { - server_addr: addr, - validators: validators.clone(), - consensus: Some(consensus::Config { - key: key.clone(), - public_addr: *addr, - }), - gossip: gossip::Config { - key: rng.gen(), - dynamic_inbound_limit: n as u64, - static_inbound: HashSet::default(), - static_outbound: HashMap::default(), - enable_pings: true, - }, - } - }); - let mut cfgs: Vec<_> = configs.collect(); - - for i in 0..cfgs.len() { - for j in 0..gossip_peers { - let j = (i + j + 1) % n; - let peer = cfgs[j].gossip.key.public(); - let addr = *cfgs[j].server_addr; - cfgs[i].gossip.static_outbound.insert(peer, addr); - } +/// Construct configs for `n` validators of the consensus. +pub fn new_configs( + rng: &mut R, + setup: &validator::testonly::GenesisSetup, + gossip_peers: usize, +) -> Vec { + let configs = setup.keys.iter().map(|key| { + let addr = net::tcp::testonly::reserve_listener(); + Config { + server_addr: addr, + validators: setup.validator_set(), + // Pings are disabled in tests by default to avoid dropping connections + // due to timeouts. + enable_pings: false, + consensus: Some(consensus::Config { + key: key.clone(), + public_addr: *addr, + }), + gossip: gossip::Config { + key: rng.gen(), + dynamic_inbound_limit: setup.keys.len(), + static_inbound: HashSet::default(), + static_outbound: HashMap::default(), + }, + max_block_size: usize::MAX, } - cfgs + }); + let mut cfgs: Vec<_> = configs.collect(); + + let n = cfgs.len(); + for i in 0..n { + for j in 0..gossip_peers { + let j = (i + j + 1) % n; + let peer = cfgs[j].gossip.key.public(); + let addr = *cfgs[j].server_addr; + cfgs[i].gossip.static_outbound.insert(peer, addr); + } + } + cfgs +} + +/// Runner for Instance. +pub struct InstanceRunner { + state: Arc, + terminate: channel::Receiver<()>, + pipe: pipe::ActorPipe, +} + +impl InstanceRunner { + /// Runs the instance background processes. + pub async fn run(mut self, ctx: &ctx::Ctx) -> anyhow::Result<()> { + scope::run!(ctx, |ctx, s| async { + s.spawn_bg(crate::run_network(ctx, self.state, self.pipe)); + let _ = self.terminate.recv(ctx).await; + Ok(()) + }) + .await?; + drop(self.terminate); + Ok(()) } +} +impl Instance { /// Construct an instance for a given config. - pub(crate) fn from_cfg(cfg: Config) -> Self { + pub fn new(cfg: Config, block_store: Arc) -> (Self, InstanceRunner) { let (events_send, events_recv) = channel::unbounded(); - Self { - state: State::new(cfg, Some(events_send), None).expect("Invalid network config"), - events: events_recv, - } + let (actor_pipe, dispatcher_pipe) = pipe::new(); + let state = + State::new(cfg, block_store, Some(events_send)).expect("Invalid network config"); + let (terminate_send, terminate_recv) = channel::bounded(1); + ( + Self { + state: state.clone(), + events: events_recv, + pipe: dispatcher_pipe, + terminate: terminate_send, + }, + InstanceRunner { + state: state.clone(), + pipe: actor_pipe, + terminate: terminate_recv, + }, + ) + } + + /// Sends a termination signal to a node + /// and waits for it to terminate. + pub async fn terminate(&self, ctx: &ctx::Ctx) -> ctx::OrCanceled<()> { + let _ = self.terminate.try_send(()); + self.terminate.closed(ctx).await } - /// Constructs `n` node instances, configured to connect to each other. - /// Non-blocking (it doesn't run the network actors). - pub fn new(rng: &mut R, n: usize, gossip_peers: usize) -> Vec { - Self::new_configs(rng, n, gossip_peers) - .into_iter() - .map(Self::from_cfg) - .collect() + /// Pipe getter. + pub fn pipe( + &mut self, + ) -> &mut pipe::DispatcherPipe { + &mut self.pipe } /// State getter. @@ -122,32 +173,6 @@ impl Instance { &self.state.gossip.cfg } - /// Returns the overall config for this node. - pub fn to_config(&self) -> Config { - Config { - server_addr: self.state.cfg.server_addr, - validators: self.state.cfg.validators.clone(), - gossip: self.state.gossip.cfg.clone(), - consensus: self - .state - .consensus - .as_ref() - .map(|consensus_state| consensus_state.cfg.clone()), - } - } - - /// Sets a `SyncState` subscriber for the node. Panics if the node state is already shared. - pub fn set_sync_state_subscriber(&mut self, sync_state: watch::Receiver) { - let state = Arc::get_mut(&mut self.state).expect("node state is shared"); - state.gossip.sync_state = Some(sync_state); - } - - /// Disables ping messages over the gossip network. - pub fn disable_gossip_pings(&mut self) { - let state = Arc::get_mut(&mut self.state).expect("node state is shared"); - state.gossip.cfg.enable_pings = false; - } - /// Wait for static outbound gossip connections to be established. pub async fn wait_for_gossip_connections(&self) { let gossip_state = &self.state.gossip; @@ -178,6 +203,42 @@ impl Instance { .await .unwrap(); } + + /// Waits for node to be disconnected from the given gossip peer. + pub async fn wait_for_gossip_disconnect( + &self, + ctx: &ctx::Ctx, + peer: &node::PublicKey, + ) -> ctx::OrCanceled<()> { + let state = &self.state.gossip; + sync::wait_for(ctx, &mut state.inbound.subscribe(), |got| { + !got.current().contains(peer) + }) + .await?; + sync::wait_for(ctx, &mut state.outbound.subscribe(), |got| { + !got.current().contains(peer) + }) + .await?; + Ok(()) + } + + /// Waits for node to be disconnected from the given consensus peer. + pub async fn wait_for_consensus_disconnect( + &self, + ctx: &ctx::Ctx, + peer: &validator::PublicKey, + ) -> ctx::OrCanceled<()> { + let state = self.state.consensus.as_ref().unwrap(); + sync::wait_for(ctx, &mut state.inbound.subscribe(), |got| { + !got.current().contains(peer) + }) + .await?; + sync::wait_for(ctx, &mut state.outbound.subscribe(), |got| { + !got.current().contains(peer) + }) + .await?; + Ok(()) + } } /// Instantly broadcasts the ValidatorAddrs off-band @@ -217,16 +278,3 @@ pub async fn instant_network( tracing::info!("consensus network established"); Ok(()) } - -impl SyncState { - /// Generates a random state based on the provided RNG and with the specified `last_stored_block` - /// number. - pub(crate) fn gen(rng: &mut impl Rng, number: validator::BlockNumber) -> Self { - let mut this = Self { - first_stored_block: rng.gen(), - last_stored_block: rng.gen(), - }; - this.last_stored_block.message.proposal.number = number; - this - } -} diff --git a/node/actors/network/src/tests.rs b/node/actors/network/src/tests.rs index e6b5b220..75d10b1f 100644 --- a/node/actors/network/src/tests.rs +++ b/node/actors/network/src/tests.rs @@ -1,7 +1,8 @@ -use crate::{run_network, testonly}; +use crate::testonly; use tracing::Instrument as _; use zksync_concurrency::{ctx, scope, testonly::abort_on_panic}; -use zksync_consensus_utils::pipe; +use zksync_consensus_roles::validator; +use zksync_consensus_storage::testonly::new_store; /// Test that metrics are correctly defined /// (won't panic during registration). @@ -10,15 +11,20 @@ async fn test_metrics() { abort_on_panic(); let ctx = &mut ctx::test_root(&ctx::RealClock); let rng = &mut ctx.rng(); - let nodes = testonly::Instance::new(rng, 3, 1); + let setup = validator::testonly::GenesisSetup::new(rng, 3); + let cfgs = testonly::new_configs(rng, &setup, 1); scope::run!(ctx, |ctx, s| async { - for (i, node) in nodes.iter().enumerate() { - let (network_pipe, _) = pipe::new(); - s.spawn_bg( - run_network(ctx, node.state.clone(), network_pipe) - .instrument(tracing::info_span!("node", i)), - ); - } + let (store, runner) = new_store(ctx, &setup.blocks[0]).await; + s.spawn_bg(runner.run(ctx)); + let nodes: Vec<_> = cfgs + .into_iter() + .enumerate() + .map(|(i, cfg)| { + let (node, runner) = testonly::Instance::new(cfg, store.clone()); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); + node + }) + .collect(); testonly::instant_network(ctx, nodes.iter()).await?; let registry = vise::MetricsCollection::default().collect(); diff --git a/node/actors/sync_blocks/src/lib.rs b/node/actors/sync_blocks/src/lib.rs index cda9050c..977ad696 100644 --- a/node/actors/sync_blocks/src/lib.rs +++ b/node/actors/sync_blocks/src/lib.rs @@ -4,9 +4,9 @@ //! network RPCs. use crate::io::{InputMessage, OutputMessage}; use std::sync::Arc; -use zksync_concurrency::{ctx, error::Wrap as _, scope}; -use zksync_consensus_network::io::{GetBlockError, SyncBlocksRequest}; -use zksync_consensus_storage::{BlockStore, BlockStoreState}; +use zksync_concurrency::{ctx, scope}; +use zksync_consensus_network::io::SyncBlocksRequest; +use zksync_consensus_storage::BlockStore; use zksync_consensus_utils::pipe::ActorPipe; mod config; @@ -36,32 +36,12 @@ impl Config { state, response, }) => { - let res = peer_states.update( - &peer, - BlockStoreState { - first: state.first_stored_block, - last: state.last_stored_block, - }, - ); + let res = peer_states.update(&peer, state); if let Err(err) = res { tracing::info!(%err, ?peer, "peer_states.update()"); } response.send(()).ok(); } - InputMessage::Network(SyncBlocksRequest::GetBlock { - block_number, - response, - }) => { - response - .send( - storage - .block(ctx, block_number) - .await - .wrap("storage.block()")? - .ok_or(GetBlockError::NotSynced), - ) - .ok(); - } } } }) diff --git a/node/actors/sync_blocks/src/peers/tests/basics.rs b/node/actors/sync_blocks/src/peers/tests/basics.rs index dea7e712..4ccda284 100644 --- a/node/actors/sync_blocks/src/peers/tests/basics.rs +++ b/node/actors/sync_blocks/src/peers/tests/basics.rs @@ -1,7 +1,10 @@ //! Basic tests. use super::*; -use crate::{io, tests::wait_for_stored_block}; +use crate::{ + io, + tests::{send_block, sync_state}, +}; #[derive(Debug)] struct UpdatingPeerStateWithSingleBlock; @@ -12,7 +15,7 @@ impl Test for UpdatingPeerStateWithSingleBlock { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { - test_validators, + setup, peer_states, storage, mut message_receiver, @@ -23,7 +26,7 @@ impl Test for UpdatingPeerStateWithSingleBlock { let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); peer_states - .update(&peer_key, test_validators.sync_state(1)) + .update(&peer_key, sync_state(&setup, 1)) .unwrap(); // Check that the actor has sent a `get_block` request to the peer @@ -37,16 +40,13 @@ impl Test for UpdatingPeerStateWithSingleBlock { assert_eq!(number, BlockNumber(1)); // Emulate the peer sending a correct response. - test_validators.send_block(BlockNumber(1), response); + send_block(&setup, BlockNumber(1), response); let peer_event = events_receiver.recv(ctx).await?; assert_matches!(peer_event, PeerStateEvent::GotBlock(BlockNumber(1))); // Check that the block has been saved locally. - sync::wait_for(ctx, &mut storage.subscribe(), |state| { - state.contains(BlockNumber(1)) - }) - .await?; + storage.wait_until_persisted(ctx, BlockNumber(1)).await?; Ok(()) } } @@ -65,7 +65,7 @@ impl Test for CancelingBlockRetrieval { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { - test_validators, + setup, peer_states, storage, mut message_receiver, @@ -75,7 +75,7 @@ impl Test for CancelingBlockRetrieval { let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); peer_states - .update(&peer_key, test_validators.sync_state(1)) + .update(&peer_key, sync_state(&setup, 1)) .unwrap(); // Check that the actor has sent a `get_block` request to the peer @@ -83,9 +83,7 @@ impl Test for CancelingBlockRetrieval { message_receiver.recv(ctx).await?; // Emulate receiving block using external means. - storage - .queue_block(ctx, test_validators.final_blocks[1].clone()) - .await?; + storage.queue_block(ctx, setup.blocks[1].clone()).await?; // Retrieval of the block must be canceled. response.closed().await; @@ -107,7 +105,7 @@ impl Test for FilteringBlockRetrieval { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { - test_validators, + setup, peer_states, storage, mut message_receiver, @@ -115,14 +113,12 @@ impl Test for FilteringBlockRetrieval { } = handles; // Emulate receiving block using external means. - storage - .queue_block(ctx, test_validators.final_blocks[1].clone()) - .await?; + storage.queue_block(ctx, setup.blocks[1].clone()).await?; let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); peer_states - .update(&peer_key, test_validators.sync_state(2)) + .update(&peer_key, sync_state(&setup, 2)) .unwrap(); // Check that the actor has sent `get_block` request to the peer, but only for block #2. @@ -163,7 +159,7 @@ impl Test for UpdatingPeerStateWithMultipleBlocks { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { clock, - test_validators, + setup, peer_states, storage, mut message_receiver, @@ -173,10 +169,7 @@ impl Test for UpdatingPeerStateWithMultipleBlocks { let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); peer_states - .update( - &peer_key, - test_validators.sync_state(Self::BLOCK_COUNT - 1).clone(), - ) + .update(&peer_key, sync_state(&setup, Self::BLOCK_COUNT - 1).clone()) .unwrap(); let mut requested_blocks = HashMap::with_capacity(Self::MAX_CONCURRENT_BLOCKS); @@ -198,7 +191,7 @@ impl Test for UpdatingPeerStateWithMultipleBlocks { // Answer a random request. let number = *requested_blocks.keys().choose(rng).unwrap(); let response = requested_blocks.remove(&number).unwrap(); - test_validators.send_block(number, response); + send_block(&setup, number, response); let peer_event = events_receiver.recv(ctx).await?; assert_matches!(peer_event, PeerStateEvent::GotBlock(got) if got == number); @@ -208,13 +201,15 @@ impl Test for UpdatingPeerStateWithMultipleBlocks { // Answer all remaining requests. for (number, response) in requested_blocks { - test_validators.send_block(number, response); + send_block(&setup, number, response); let peer_event = events_receiver.recv(ctx).await?; assert_matches!(peer_event, PeerStateEvent::GotBlock(got) if got == number); } let expected_block_number = BlockNumber(Self::BLOCK_COUNT as u64 - 1); - wait_for_stored_block(ctx, storage.as_ref(), expected_block_number).await?; + storage + .wait_until_persisted(ctx, expected_block_number) + .await?; Ok(()) } } @@ -238,7 +233,7 @@ impl Test for DisconnectingPeer { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { clock, - test_validators, + setup, peer_states, storage, mut message_receiver, @@ -248,7 +243,7 @@ impl Test for DisconnectingPeer { let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); peer_states - .update(&peer_key, test_validators.sync_state(1)) + .update(&peer_key, sync_state(&setup, 1)) .unwrap(); // Drop the response sender emulating peer disconnect. @@ -278,7 +273,7 @@ impl Test for DisconnectingPeer { // Re-connect the peer with an updated state. peer_states - .update(&peer_key, test_validators.sync_state(2)) + .update(&peer_key, sync_state(&setup, 2)) .unwrap(); // Ensure that blocks are re-requested. clock.advance(BLOCK_SLEEP_INTERVAL); @@ -299,7 +294,7 @@ impl Test for DisconnectingPeer { assert!(responses.contains_key(&2)); // Send one of the responses and drop the other request. let response = responses.remove(&2).unwrap(); - test_validators.send_block(BlockNumber(2), response); + send_block(&setup, BlockNumber(2), response); wait_for_event(ctx, &mut events_receiver, |ev| { matches!(ev, PeerStateEvent::GotBlock(BlockNumber(2))) @@ -319,7 +314,7 @@ impl Test for DisconnectingPeer { // Re-connect the peer with the same state. peer_states - .update(&peer_key, test_validators.sync_state(2)) + .update(&peer_key, sync_state(&setup, 2)) .unwrap(); clock.advance(BLOCK_SLEEP_INTERVAL); @@ -331,7 +326,7 @@ impl Test for DisconnectingPeer { }) = message; assert_eq!(recipient, peer_key); assert_eq!(number, BlockNumber(1)); - test_validators.send_block(BlockNumber(1), response); + send_block(&setup, number, response); let peer_event = events_receiver.recv(ctx).await?; assert_matches!(peer_event, PeerStateEvent::GotBlock(BlockNumber(1))); @@ -340,7 +335,7 @@ impl Test for DisconnectingPeer { clock.advance(BLOCK_SLEEP_INTERVAL); assert_matches!(message_receiver.try_recv(), None); - wait_for_stored_block(ctx, storage.as_ref(), BlockNumber(2)).await?; + storage.wait_until_persisted(ctx, BlockNumber(2)).await?; Ok(()) } } @@ -382,7 +377,7 @@ impl Test for DownloadingBlocksInGaps { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { clock, - test_validators, + setup, peer_states, storage, mut message_receiver, @@ -391,9 +386,7 @@ impl Test for DownloadingBlocksInGaps { scope::run!(ctx, |ctx, s| async { for &block_number in &self.local_block_numbers { - s.spawn( - storage.queue_block(ctx, test_validators.final_blocks[block_number].clone()), - ); + s.spawn(storage.queue_block(ctx, setup.blocks[block_number].clone())); } let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); @@ -403,10 +396,7 @@ impl Test for DownloadingBlocksInGaps { Self::BLOCK_COUNT - 1 }; peer_states - .update( - &peer_key, - test_validators.sync_state(last_peer_block_number), - ) + .update(&peer_key, sync_state(&setup, last_peer_block_number)) .unwrap(); clock.advance(BLOCK_SLEEP_INTERVAL); @@ -418,10 +408,7 @@ impl Test for DownloadingBlocksInGaps { if expected_number > last_peer_block_number { last_peer_block_number = rng.gen_range(expected_number..Self::BLOCK_COUNT); peer_states - .update( - &peer_key, - test_validators.sync_state(last_peer_block_number), - ) + .update(&peer_key, sync_state(&setup, last_peer_block_number)) .unwrap(); clock.advance(BLOCK_SLEEP_INTERVAL); } @@ -434,8 +421,8 @@ impl Test for DownloadingBlocksInGaps { assert_eq!(recipient, peer_key); assert!(number.0 <= last_peer_block_number as u64); - test_validators.send_block(number, response); - wait_for_stored_block(ctx, storage.as_ref(), number).await?; + send_block(&setup, number, response); + storage.wait_until_persisted(ctx, number).await?; clock.advance(BLOCK_SLEEP_INTERVAL); } Ok(()) @@ -471,7 +458,7 @@ impl Test for LimitingGetBlockConcurrency { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { - test_validators, + setup, peer_states, storage, mut message_receiver, @@ -480,7 +467,7 @@ impl Test for LimitingGetBlockConcurrency { let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); peer_states - .update(&peer_key, test_validators.sync_state(Self::BLOCK_COUNT - 1)) + .update(&peer_key, sync_state(&setup, Self::BLOCK_COUNT - 1)) .unwrap(); // The actor should request 3 new blocks it's now aware of from the only peer it's currently @@ -504,8 +491,8 @@ impl Test for LimitingGetBlockConcurrency { // Send a correct response. let response = message_responses.remove(&1).unwrap(); - test_validators.send_block(BlockNumber(1), response); - wait_for_stored_block(ctx, storage.as_ref(), BlockNumber(1)).await?; + send_block(&setup, BlockNumber(1), response); + storage.wait_until_persisted(ctx, BlockNumber(1)).await?; // The actor should now request another block. let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { diff --git a/node/actors/sync_blocks/src/peers/tests/fakes.rs b/node/actors/sync_blocks/src/peers/tests/fakes.rs index e0b5cdd6..bc8e5da1 100644 --- a/node/actors/sync_blocks/src/peers/tests/fakes.rs +++ b/node/actors/sync_blocks/src/peers/tests/fakes.rs @@ -1,29 +1,33 @@ //! Tests focused on handling peers providing fake information to the node. use super::*; -use zksync_consensus_roles::validator; +use crate::tests::sync_state; +use zksync_consensus_roles::{validator, validator::testonly::GenesisSetup}; +use zksync_consensus_storage::testonly::new_store; #[tokio::test] async fn processing_invalid_sync_states() { let ctx = &ctx::test_root(&ctx::RealClock); let rng = &mut ctx.rng(); - let test_validators = TestValidators::new(rng, 4, 3); - let (storage, _runner) = make_store(ctx, test_validators.final_blocks[0].clone()).await; + let mut setup = GenesisSetup::empty(rng, 4); + setup.push_blocks(rng, 3); + let (storage, _runner) = new_store(ctx, &setup.blocks[0]).await; let (message_sender, _) = channel::unbounded(); - let peer_states = PeerStates::new(test_validators.test_config(), storage, message_sender); + let peer_states = PeerStates::new(test_config(&setup), storage, message_sender); let peer = &rng.gen::().public(); - let mut invalid_sync_state = test_validators.sync_state(1); - invalid_sync_state.first = test_validators.final_blocks[2].justification.clone(); + let mut invalid_sync_state = sync_state(&setup, 1); + invalid_sync_state.first = setup.blocks[2].justification.clone(); assert!(peer_states.update(peer, invalid_sync_state).is_err()); - let mut invalid_sync_state = test_validators.sync_state(1); + let mut invalid_sync_state = sync_state(&setup, 1); invalid_sync_state.last.message.proposal.number = BlockNumber(5); assert!(peer_states.update(peer, invalid_sync_state).is_err()); - let other_network = TestValidators::new(rng, 4, 2); - let invalid_sync_state = other_network.sync_state(1); + let mut other_network = GenesisSetup::empty(rng, 4); + other_network.push_blocks(rng, 2); + let invalid_sync_state = sync_state(&other_network, 1); assert!(peer_states.update(peer, invalid_sync_state).is_err()); } @@ -37,7 +41,7 @@ impl Test for PeerWithFakeSyncState { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { clock, - test_validators, + setup, peer_states, mut events_receiver, .. @@ -45,7 +49,7 @@ impl Test for PeerWithFakeSyncState { let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); - let mut fake_sync_state = test_validators.sync_state(1); + let mut fake_sync_state = sync_state(&setup, 1); fake_sync_state.last.message.proposal.number = BlockNumber(42); assert!(peer_states.update(&peer_key, fake_sync_state).is_err()); @@ -74,7 +78,7 @@ impl Test for PeerWithFakeBlock { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { clock, - test_validators, + setup, peer_states, storage, mut message_receiver, @@ -85,19 +89,23 @@ impl Test for PeerWithFakeBlock { for fake_block in [ // other block than requested - test_validators.final_blocks[0].clone(), + setup.blocks[0].clone(), // block with wrong validator set - TestValidators::new(rng, 4, 2).final_blocks[1].clone(), + { + let mut setup = GenesisSetup::empty(rng, 4); + setup.push_blocks(rng, 2); + setup.blocks[1].clone() + }, // block with mismatching payload, { - let mut block = test_validators.final_blocks[1].clone(); + let mut block = setup.blocks[1].clone(); block.payload = validator::Payload(b"invalid".to_vec()); block }, ] { let peer_key = rng.gen::().public(); peer_states - .update(&peer_key, test_validators.sync_state(1)) + .update(&peer_key, sync_state(&setup, 1)) .unwrap(); clock.advance(BLOCK_SLEEP_INTERVAL); diff --git a/node/actors/sync_blocks/src/peers/tests/mod.rs b/node/actors/sync_blocks/src/peers/tests/mod.rs index 3bdab70d..6349c408 100644 --- a/node/actors/sync_blocks/src/peers/tests/mod.rs +++ b/node/actors/sync_blocks/src/peers/tests/mod.rs @@ -1,12 +1,17 @@ use super::*; -use crate::tests::{make_store, TestValidators}; +use crate::tests::test_config; use assert_matches::assert_matches; use async_trait::async_trait; use rand::{seq::IteratorRandom, Rng}; use std::{collections::HashSet, fmt}; use test_casing::{test_casing, Product}; use tracing::instrument; -use zksync_concurrency::{testonly::abort_on_panic, time}; +use zksync_concurrency::{ + testonly::{abort_on_panic, set_timeout}, + time, +}; +use zksync_consensus_roles::validator; +use zksync_consensus_storage::testonly::new_store; mod basics; mod fakes; @@ -28,7 +33,7 @@ async fn wait_for_event( #[derive(Debug)] struct TestHandles { clock: ctx::ManualClock, - test_validators: TestValidators, + setup: validator::testonly::GenesisSetup, peer_states: Arc, storage: Arc, message_receiver: channel::UnboundedReceiver, @@ -48,7 +53,7 @@ trait Test: fmt::Debug + Send + Sync { &self, _ctx: &ctx::Ctx, _storage: &BlockStore, - _test_validators: &TestValidators, + _setup: &validator::testonly::GenesisSetup, ) { // Does nothing by default } @@ -59,29 +64,26 @@ trait Test: fmt::Debug + Send + Sync { #[instrument(level = "trace")] async fn test_peer_states(test: T) { abort_on_panic(); + let _guard = set_timeout(TEST_TIMEOUT); - let ctx = &ctx::test_root(&ctx::RealClock).with_timeout(TEST_TIMEOUT); let clock = ctx::ManualClock::new(); - let ctx = &ctx::test_with_clock(ctx, &clock); - let test_validators = TestValidators::new(&mut ctx.rng(), 4, T::BLOCK_COUNT); - let (store, store_run) = make_store( - ctx, - test_validators.final_blocks[T::GENESIS_BLOCK_NUMBER].clone(), - ) - .await; - test.initialize_storage(ctx, store.as_ref(), &test_validators) - .await; + let ctx = &ctx::test_root(&clock); + let rng = &mut ctx.rng(); + let mut setup = validator::testonly::GenesisSetup::new(rng, 4); + setup.push_blocks(rng, T::BLOCK_COUNT); + let (store, store_run) = new_store(ctx, &setup.blocks[T::GENESIS_BLOCK_NUMBER]).await; + test.initialize_storage(ctx, store.as_ref(), &setup).await; let (message_sender, message_receiver) = channel::unbounded(); let (events_sender, events_receiver) = channel::unbounded(); - let mut config = test_validators.test_config(); + let mut config = test_config(&setup); test.tweak_config(&mut config); let mut peer_states = PeerStates::new(config, store.clone(), message_sender); peer_states.events_sender = Some(events_sender); let peer_states = Arc::new(peer_states); let test_handles = TestHandles { clock, - test_validators, + setup, peer_states: peer_states.clone(), storage: store.clone(), message_receiver, diff --git a/node/actors/sync_blocks/src/peers/tests/multiple_peers.rs b/node/actors/sync_blocks/src/peers/tests/multiple_peers.rs index f665c5a5..dcc21ea5 100644 --- a/node/actors/sync_blocks/src/peers/tests/multiple_peers.rs +++ b/node/actors/sync_blocks/src/peers/tests/multiple_peers.rs @@ -1,7 +1,7 @@ //! Tests focused on interaction with multiple peers. use super::*; -use crate::tests::wait_for_stored_block; +use crate::tests::{send_block, sync_state}; #[derive(Debug)] struct RequestingBlocksFromTwoPeers; @@ -19,7 +19,7 @@ impl Test for RequestingBlocksFromTwoPeers { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { clock, - test_validators, + setup, peer_states, storage, mut message_receiver, @@ -29,7 +29,7 @@ impl Test for RequestingBlocksFromTwoPeers { let rng = &mut ctx.rng(); let first_peer = rng.gen::().public(); peer_states - .update(&first_peer, test_validators.sync_state(2)) + .update(&first_peer, sync_state(&setup, 2)) .unwrap(); let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { @@ -45,7 +45,7 @@ impl Test for RequestingBlocksFromTwoPeers { let second_peer = rng.gen::().public(); peer_states - .update(&second_peer, test_validators.sync_state(4)) + .update(&second_peer, sync_state(&setup, 4)) .unwrap(); clock.advance(BLOCK_SLEEP_INTERVAL); @@ -61,7 +61,7 @@ impl Test for RequestingBlocksFromTwoPeers { ); tracing::info!(%second_peer_block_number, "received requrest"); - test_validators.send_block(first_peer_block_number, first_peer_response); + send_block(&setup, first_peer_block_number, first_peer_response); wait_for_event( ctx, &mut events_receiver, @@ -75,7 +75,7 @@ impl Test for RequestingBlocksFromTwoPeers { assert_matches!(message_receiver.try_recv(), None); peer_states - .update(&first_peer, test_validators.sync_state(4)) + .update(&first_peer, sync_state(&setup, 4)) .unwrap(); clock.advance(BLOCK_SLEEP_INTERVAL); // Now the actor can get block #3 from the peer. @@ -91,7 +91,7 @@ impl Test for RequestingBlocksFromTwoPeers { ); tracing::info!(%first_peer_block_number, "received requrest"); - test_validators.send_block(first_peer_block_number, first_peer_response); + send_block(&setup, first_peer_block_number, first_peer_response); wait_for_event( ctx, &mut events_receiver, @@ -112,7 +112,7 @@ impl Test for RequestingBlocksFromTwoPeers { ); tracing::info!(%first_peer_block_number, "received requrest"); - test_validators.send_block(second_peer_block_number, second_peer_response); + send_block(&setup, second_peer_block_number, second_peer_response); wait_for_event( ctx, &mut events_receiver, @@ -120,7 +120,7 @@ impl Test for RequestingBlocksFromTwoPeers { ) .await .unwrap(); - test_validators.send_block(first_peer_block_number, first_peer_response); + send_block(&setup, first_peer_block_number, first_peer_response); wait_for_event( ctx, &mut events_receiver, @@ -132,7 +132,7 @@ impl Test for RequestingBlocksFromTwoPeers { clock.advance(BLOCK_SLEEP_INTERVAL); assert_matches!(message_receiver.try_recv(), None); - wait_for_stored_block(ctx, storage.as_ref(), BlockNumber(4)).await?; + storage.wait_until_persisted(ctx, BlockNumber(4)).await?; Ok(()) } } @@ -202,7 +202,7 @@ impl Test for RequestingBlocksFromMultiplePeers { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { clock, - test_validators, + setup, peer_states, storage, mut message_receiver, @@ -216,7 +216,7 @@ impl Test for RequestingBlocksFromMultiplePeers { // Announce peer states. for (peer_key, peer) in peers { let last_block = peer.last_block.0 as usize; - peer_states.update(peer_key, test_validators.sync_state(last_block)).unwrap(); + peer_states.update(peer_key, sync_state(&setup, last_block)).unwrap(); } s.spawn_bg(async { @@ -251,7 +251,7 @@ impl Test for RequestingBlocksFromMultiplePeers { // Peer is at capacity, respond to a random request in order to progress let idx = rng.gen_range(0..peer_responses.len()); let (number, response) = peer_responses.remove(idx); - test_validators.send_block(number, response); + send_block(&setup, number, response); } // Respond to some other random requests. @@ -262,14 +262,14 @@ impl Test for RequestingBlocksFromMultiplePeers { continue; } let (number, response) = peer_responses.remove(idx); - test_validators.send_block(number, response); + send_block(&setup, number, response); } } } // Answer to all remaining responses for (number, response) in responses_by_peer.into_values().flatten() { - test_validators.send_block(number, response); + send_block(&setup, number, response); } Ok(()) }); @@ -291,7 +291,7 @@ impl Test for RequestingBlocksFromMultiplePeers { } } - wait_for_stored_block(ctx, storage.as_ref(), BlockNumber(19)).await?; + storage.wait_until_persisted(ctx,BlockNumber(19)).await?; Ok(()) }) .await diff --git a/node/actors/sync_blocks/src/peers/tests/snapshots.rs b/node/actors/sync_blocks/src/peers/tests/snapshots.rs index e92db55e..740e6412 100644 --- a/node/actors/sync_blocks/src/peers/tests/snapshots.rs +++ b/node/actors/sync_blocks/src/peers/tests/snapshots.rs @@ -1,7 +1,7 @@ //! Tests related to snapshot storage. use super::*; -use crate::tests::wait_for_stored_block; +use crate::tests::{send_block, snapshot_sync_state, sync_state}; use zksync_consensus_network::io::GetBlockError; #[derive(Debug)] @@ -18,7 +18,7 @@ impl Test for UpdatingPeerStateWithStorageSnapshot { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { - test_validators, + setup, peer_states, storage, mut message_receiver, @@ -29,7 +29,7 @@ impl Test for UpdatingPeerStateWithStorageSnapshot { let peer_key = rng.gen::().public(); for stale_block_number in [1, 2] { peer_states - .update(&peer_key, test_validators.sync_state(stale_block_number)) + .update(&peer_key, sync_state(&setup, stale_block_number)) .unwrap(); // No new block requests should be issued. @@ -39,7 +39,7 @@ impl Test for UpdatingPeerStateWithStorageSnapshot { } peer_states - .update(&peer_key, test_validators.sync_state(3)) + .update(&peer_key, sync_state(&setup, 3)) .unwrap(); // Check that the actor has sent a `get_block` request to the peer @@ -53,7 +53,7 @@ impl Test for UpdatingPeerStateWithStorageSnapshot { assert_eq!(number, BlockNumber(3)); // Emulate the peer sending a correct response. - test_validators.send_block(BlockNumber(3), response); + send_block(&setup, BlockNumber(3), response); wait_for_event(ctx, &mut events_receiver, |ev| { matches!(ev, PeerStateEvent::GotBlock(BlockNumber(3))) @@ -62,7 +62,7 @@ impl Test for UpdatingPeerStateWithStorageSnapshot { .unwrap(); // Check that the block has been saved locally. - wait_for_stored_block(ctx, &storage, BlockNumber(3)).await?; + storage.wait_until_queued(ctx, BlockNumber(3)).await?; Ok(()) } } @@ -85,7 +85,7 @@ impl Test for FilteringRequestsForSnapshotPeer { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { - test_validators, + setup, peer_states, mut message_receiver, mut events_receiver, @@ -96,7 +96,7 @@ impl Test for FilteringRequestsForSnapshotPeer { let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); peer_states - .update(&peer_key, test_validators.snapshot_sync_state(2..=2)) + .update(&peer_key, snapshot_sync_state(&setup, 2..=2)) .unwrap(); // The peer should only be queried for blocks that it actually has (#2 in this case). @@ -110,7 +110,7 @@ impl Test for FilteringRequestsForSnapshotPeer { assert_eq!(number, BlockNumber(2)); // Emulate the peer sending a correct response. - test_validators.send_block(BlockNumber(2), response); + send_block(&setup, BlockNumber(2), response); wait_for_event(ctx, &mut events_receiver, |ev| { matches!(ev, PeerStateEvent::GotBlock(BlockNumber(2))) }) @@ -124,7 +124,7 @@ impl Test for FilteringRequestsForSnapshotPeer { // Emulate peer receiving / producing a new block. peer_states - .update(&peer_key, test_validators.snapshot_sync_state(2..=3)) + .update(&peer_key, snapshot_sync_state(&setup, 2..=3)) .unwrap(); let message = message_receiver.recv(ctx).await?; @@ -139,7 +139,7 @@ impl Test for FilteringRequestsForSnapshotPeer { // Emulate another peer with full history. let full_peer_key = rng.gen::().public(); peer_states - .update(&full_peer_key, test_validators.sync_state(3)) + .update(&full_peer_key, sync_state(&setup, 3)) .unwrap(); clock.advance(BLOCK_SLEEP_INTERVAL); @@ -154,7 +154,7 @@ impl Test for FilteringRequestsForSnapshotPeer { assert_eq!(recipient, full_peer_key); assert_eq!(number, BlockNumber(1)); - test_validators.send_block(BlockNumber(1), response); + send_block(&setup, BlockNumber(1), response); wait_for_event(ctx, &mut events_receiver, |ev| { matches!(ev, PeerStateEvent::GotBlock(BlockNumber(1))) }) @@ -201,7 +201,7 @@ impl Test for PruningPeerHistory { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { - test_validators, + setup, peer_states, mut message_receiver, mut events_receiver, @@ -212,7 +212,7 @@ impl Test for PruningPeerHistory { let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); peer_states - .update(&peer_key, test_validators.sync_state(1)) + .update(&peer_key, sync_state(&setup, 1)) .unwrap(); let message = message_receiver.recv(ctx).await?; @@ -226,7 +226,7 @@ impl Test for PruningPeerHistory { // Emulate peer pruning blocks. peer_states - .update(&peer_key, test_validators.snapshot_sync_state(3..=3)) + .update(&peer_key, snapshot_sync_state(&setup, 3..=3)) .unwrap(); let message = message_receiver.recv(ctx).await?; @@ -238,7 +238,7 @@ impl Test for PruningPeerHistory { assert_eq!(recipient, peer_key); assert_eq!(number, BlockNumber(3)); - test_validators.send_block(BlockNumber(3), response); + send_block(&setup, BlockNumber(3), response); wait_for_event(ctx, &mut events_receiver, |ev| { matches!(ev, PeerStateEvent::GotBlock(BlockNumber(3))) }) @@ -250,7 +250,9 @@ impl Test for PruningPeerHistory { sync::yield_now().await; assert!(message_receiver.try_recv().is_none()); - block1_response.send(Err(GetBlockError::NotSynced)).unwrap(); + block1_response + .send(Err(GetBlockError::NotAvailable)) + .unwrap(); // Block #1 should not be requested again (the peer no longer has it). clock.advance(BLOCK_SLEEP_INTERVAL); sync::yield_now().await; @@ -278,7 +280,7 @@ impl Test for BackfillingPeerHistory { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { - test_validators, + setup, peer_states, mut message_receiver, clock, @@ -288,7 +290,7 @@ impl Test for BackfillingPeerHistory { let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); peer_states - .update(&peer_key, test_validators.snapshot_sync_state(3..=3)) + .update(&peer_key, snapshot_sync_state(&setup, 3..=3)) .unwrap(); let message = message_receiver.recv(ctx).await?; @@ -299,7 +301,7 @@ impl Test for BackfillingPeerHistory { assert_eq!(number, BlockNumber(3)); peer_states - .update(&peer_key, test_validators.sync_state(3)) + .update(&peer_key, sync_state(&setup, 3)) .unwrap(); clock.advance(BLOCK_SLEEP_INTERVAL); let mut new_requested_numbers = HashSet::new(); diff --git a/node/actors/sync_blocks/src/tests/end_to_end.rs b/node/actors/sync_blocks/src/tests/end_to_end.rs index 283798ff..96f254b1 100644 --- a/node/actors/sync_blocks/src/tests/end_to_end.rs +++ b/node/actors/sync_blocks/src/tests/end_to_end.rs @@ -6,10 +6,12 @@ use rand::seq::SliceRandom; use std::fmt; use test_casing::test_casing; use tracing::{instrument, Instrument}; -use zksync_concurrency::{ctx, scope, sync, testonly::abort_on_panic}; +use zksync_concurrency::{ + ctx, scope, + testonly::{abort_on_panic, set_timeout}, +}; use zksync_consensus_network as network; -use zksync_consensus_network::{io::SyncState, testonly::Instance as NetworkInstance}; -use zksync_consensus_roles::node; +use zksync_consensus_storage::testonly::new_store; use zksync_consensus_utils::no_copy::NoCopy; type NetworkDispatcherPipe = @@ -18,7 +20,7 @@ type NetworkDispatcherPipe = #[derive(Debug)] struct Node { store: Arc, - test_validators: Arc, + setup: Arc, switch_on_sender: Option>, _switch_off_sender: oneshot::Sender<()>, } @@ -30,11 +32,14 @@ impl Node { gossip_peers: usize, ) -> (Vec, Vec) { let rng = &mut ctx.rng(); - let test_validators = Arc::new(TestValidators::new(rng, 4, 20)); + // NOTE: originally there were only 4 consensus nodes. + let mut setup = validator::testonly::GenesisSetup::new(rng, node_count); + setup.push_blocks(rng, 20); + let setup = Arc::new(setup); let mut nodes = vec![]; let mut runners = vec![]; - for net in NetworkInstance::new(rng, node_count, gossip_peers) { - let (n, r) = Node::new(ctx, net, test_validators.clone()).await; + for net in network::testonly::new_configs(rng, &setup, gossip_peers) { + let (n, r) = Node::new(ctx, net, setup.clone()).await; nodes.push(n); runners.push(r); } @@ -43,26 +48,24 @@ impl Node { async fn new( ctx: &ctx::Ctx, - mut network: NetworkInstance, - test_validators: Arc, + network: network::Config, + setup: Arc, ) -> (Self, NodeRunner) { - let (store, store_runner) = make_store(ctx, test_validators.final_blocks[0].clone()).await; + let (store, store_runner) = new_store(ctx, &setup.blocks[0]).await; let (switch_on_sender, switch_on_receiver) = oneshot::channel(); let (switch_off_sender, switch_off_receiver) = oneshot::channel(); - network.disable_gossip_pings(); - let runner = NodeRunner { network, store: store.clone(), store_runner, - test_validators: test_validators.clone(), + setup: setup.clone(), switch_on_receiver, switch_off_receiver, }; let this = Self { store, - test_validators, + setup, switch_on_sender: Some(switch_on_sender), _switch_off_sender: switch_off_sender, }; @@ -75,88 +78,52 @@ impl Node { async fn put_block(&self, ctx: &ctx::Ctx, block_number: BlockNumber) { tracing::trace!(%block_number, "Storing new block"); - let block = &self.test_validators.final_blocks[block_number.0 as usize]; + let block = &self.setup.blocks[block_number.0 as usize]; self.store.queue_block(ctx, block.clone()).await.unwrap(); } } #[must_use] struct NodeRunner { - network: NetworkInstance, + network: network::Config, store: Arc, store_runner: BlockStoreRunner, - test_validators: Arc, + setup: Arc, switch_on_receiver: oneshot::Receiver<()>, switch_off_receiver: oneshot::Receiver<()>, } -impl fmt::Debug for NodeRunner { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - formatter - .debug_struct("NodeRunner") - .field("key", &self.key()) - .finish() - } -} - -fn to_sync_state(state: BlockStoreState) -> SyncState { - SyncState { - first_stored_block: state.first, - last_stored_block: state.last, - } -} - impl NodeRunner { - fn key(&self) -> node::PublicKey { - self.network.gossip_config().key.public() - } - - async fn run(mut self, ctx: &ctx::Ctx) -> anyhow::Result<()> { - let key = self.key(); + async fn run(self, ctx: &ctx::Ctx) -> anyhow::Result<()> { + tracing::info!("NodeRunner::run()"); + let key = self.network.gossip.key.public(); let (sync_blocks_actor_pipe, sync_blocks_dispatcher_pipe) = pipe::new(); - let (network_actor_pipe, network_dispatcher_pipe) = pipe::new(); - let mut store_state = self.store.subscribe(); - let sync_state = sync::watch::channel(to_sync_state(store_state.borrow().clone())).0; - self.network - .set_sync_state_subscriber(sync_state.subscribe()); - - let sync_blocks_config = self.test_validators.test_config(); + let (mut network, network_runner) = + network::testonly::Instance::new(self.network.clone(), self.store.clone()); + let sync_blocks_config = test_config(&self.setup); scope::run!(ctx, |ctx, s| async { s.spawn_bg(self.store_runner.run(ctx)); - s.spawn_bg(async { - while let Ok(state) = sync::changed(ctx, &mut store_state).await { - sync_state.send_replace(to_sync_state(state.clone())); - } - Ok(()) - }); - s.spawn_bg(async { - network::run_network(ctx, self.network.state().clone(), network_actor_pipe) - .instrument(tracing::trace_span!("network", ?key)) - .await - .with_context(|| format!("network for {key:?}")) - }); - self.network.wait_for_gossip_connections().await; - tracing::trace!("Node connected to peers"); + s.spawn_bg(network_runner.run(ctx)); + network.wait_for_gossip_connections().await; + tracing::info!("Node connected to peers"); self.switch_on_receiver .recv_or_disconnected(ctx) .await? .ok(); - s.spawn_bg(async { - Self::run_executor(ctx, sync_blocks_dispatcher_pipe, network_dispatcher_pipe) - .instrument(tracing::trace_span!("mock_executor", ?key)) - .await - .with_context(|| format!("executor for {key:?}")) - }); + tracing::info!("switch_on"); + s.spawn_bg( + async { + Self::run_executor(ctx, sync_blocks_dispatcher_pipe, network.pipe()) + .await + .with_context(|| format!("executor for {key:?}")) + } + .instrument(tracing::info_span!("mock_executor", ?key)), + ); s.spawn_bg(sync_blocks_config.run(ctx, sync_blocks_actor_pipe, self.store.clone())); tracing::info!("Node is fully started"); - self.switch_off_receiver - .recv_or_disconnected(ctx) - .await - .ok(); - // ^ Unlike with `switch_on_receiver`, the context may get canceled before the receiver - // is dropped, so we swallow both cancellation and disconnect errors here. + let _ = self.switch_off_receiver.recv_or_disconnected(ctx).await; tracing::info!("Node stopped"); Ok(()) }) @@ -166,7 +133,7 @@ impl NodeRunner { async fn run_executor( ctx: &ctx::Ctx, mut sync_blocks_dispatcher_pipe: pipe::DispatcherPipe, - mut network_dispatcher_pipe: NetworkDispatcherPipe, + network_dispatcher_pipe: &mut NetworkDispatcherPipe, ) -> anyhow::Result<()> { scope::run!(ctx, |ctx, s| async { s.spawn(async { @@ -202,12 +169,9 @@ trait GossipNetworkTest: fmt::Debug + Send { #[instrument(level = "trace")] async fn test_sync_blocks(test: T) { - const CLOCK_SPEEDUP: u32 = 25; - abort_on_panic(); - - let ctx = &ctx::test_root(&ctx::AffineClock::new(CLOCK_SPEEDUP as f64)) - .with_timeout(TEST_TIMEOUT * CLOCK_SPEEDUP); + let _guard = set_timeout(TEST_TIMEOUT); + let ctx = &ctx::test_root(&ctx::AffineClock::new(25.)); let (node_count, gossip_peers) = test.network_params(); let (nodes, runners) = Node::new_network(ctx, node_count, gossip_peers).await; scope::run!(ctx, |ctx, s| async { @@ -235,7 +199,7 @@ impl GossipNetworkTest for BasicSynchronization { async fn test(self, ctx: &ctx::Ctx, mut node_handles: Vec) -> anyhow::Result<()> { let rng = &mut ctx.rng(); - // Check initial node states. + tracing::info!("Check initial node states"); for node_handle in &mut node_handles { node_handle.switch_on(); let state = node_handle.store.subscribe().borrow().clone(); @@ -247,11 +211,14 @@ impl GossipNetworkTest for BasicSynchronization { let sending_node = node_handles.choose(rng).unwrap(); sending_node.put_block(ctx, block_number).await; - // Wait until all nodes get this block. + tracing::info!("Wait until all nodes get block #{block_number}"); for node_handle in &mut node_handles { - wait_for_stored_block(ctx, &node_handle.store, block_number).await?; + node_handle + .store + .wait_until_persisted(ctx, block_number) + .await?; + tracing::info!("OK"); } - tracing::trace!("All nodes received block #{block_number}"); } let sending_node = node_handles.choose(rng).unwrap(); @@ -267,7 +234,10 @@ impl GossipNetworkTest for BasicSynchronization { // Wait until nodes get all new blocks. for node_handle in &node_handles { - wait_for_stored_block(ctx, &node_handle.store, BlockNumber(9)).await?; + node_handle + .store + .wait_until_persisted(ctx, BlockNumber(9)) + .await?; } Ok(()) }) @@ -324,7 +294,10 @@ impl GossipNetworkTest for SwitchingOffNodes { // Wait until all remaining nodes get the new block. for node_handle in &node_handles { - wait_for_stored_block(ctx, &node_handle.store, block_number).await?; + node_handle + .store + .wait_until_persisted(ctx, block_number) + .await?; } tracing::trace!("All nodes received block #{block_number}"); block_number = block_number.next(); @@ -373,7 +346,10 @@ impl GossipNetworkTest for SwitchingOnNodes { // Wait until all switched on nodes get the new block. for node_handle in &mut switched_on_nodes { - wait_for_stored_block(ctx, &node_handle.store, block_number).await?; + node_handle + .store + .wait_until_persisted(ctx, block_number) + .await?; } tracing::trace!("All nodes received block #{block_number}"); block_number = block_number.next(); diff --git a/node/actors/sync_blocks/src/tests/mod.rs b/node/actors/sync_blocks/src/tests/mod.rs index deb1891c..54dd2fff 100644 --- a/node/actors/sync_blocks/src/tests/mod.rs +++ b/node/actors/sync_blocks/src/tests/mod.rs @@ -4,42 +4,17 @@ use rand::{ distributions::{Distribution, Standard}, Rng, }; -use std::{iter, ops}; -use zksync_concurrency::{oneshot, sync, testonly::abort_on_panic, time}; -use zksync_consensus_network::io::{GetBlockError, GetBlockResponse, SyncBlocksRequest}; -use zksync_consensus_roles::validator::{ - self, - testonly::{make_block, make_genesis_block}, - BlockHeader, BlockNumber, CommitQC, FinalBlock, Payload, ValidatorSet, -}; -use zksync_consensus_storage::{testonly::in_memory, BlockStore, BlockStoreRunner}; +use std::ops; +use zksync_concurrency::{oneshot, time}; +use zksync_consensus_network::io::GetBlockError; +use zksync_consensus_roles::validator::{self, testonly::GenesisSetup, BlockNumber, ValidatorSet}; +use zksync_consensus_storage::{BlockStore, BlockStoreRunner, BlockStoreState}; use zksync_consensus_utils::pipe; mod end_to_end; const TEST_TIMEOUT: time::Duration = time::Duration::seconds(20); -pub(crate) async fn make_store( - ctx: &ctx::Ctx, - genesis: FinalBlock, -) -> (Arc, BlockStoreRunner) { - let storage = in_memory::BlockStore::new(genesis); - BlockStore::new(ctx, Box::new(storage)).await.unwrap() -} - -pub(crate) async fn wait_for_stored_block( - ctx: &ctx::Ctx, - storage: &BlockStore, - block_number: BlockNumber, -) -> ctx::OrCanceled<()> { - tracing::trace!("Started waiting for stored block"); - sync::wait_for(ctx, &mut storage.subscribe(), |state| { - state.next() > block_number - }) - .await?; - Ok(()) -} - impl Distribution for Standard { fn sample(&self, rng: &mut R) -> Config { let validator_set: ValidatorSet = rng.gen(); @@ -48,185 +23,34 @@ impl Distribution for Standard { } } -#[derive(Debug, Clone)] -pub(crate) struct TestValidators { - validator_secret_keys: Vec, - pub(crate) validator_set: ValidatorSet, - pub(crate) final_blocks: Vec, +pub(crate) fn test_config(setup: &GenesisSetup) -> Config { + Config::new(setup.validator_set(), setup.keys.len()).unwrap() } -impl TestValidators { - pub(crate) fn new(rng: &mut impl Rng, validator_count: usize, block_count: usize) -> Self { - let validator_secret_keys: Vec = - (0..validator_count).map(|_| rng.gen()).collect(); - let validator_set = validator_secret_keys.iter().map(|sk| sk.public()); - let validator_set = ValidatorSet::new(validator_set).unwrap(); - - let mut this = Self { - validator_secret_keys, - validator_set, - final_blocks: vec![], - }; - - let payload = Payload(vec![]); - let mut latest_block = BlockHeader::genesis(payload.hash(), BlockNumber(0)); - let final_blocks = (0..block_count).map(|_| { - let final_block = FinalBlock { - payload: payload.clone(), - justification: this.certify_block(&latest_block), - }; - latest_block = BlockHeader::new(&latest_block, payload.hash()); - final_block - }); - this.final_blocks = final_blocks.collect(); - this - } - - pub(crate) fn test_config(&self) -> Config { - Config::new(self.validator_set.clone(), self.validator_secret_keys.len()).unwrap() - } - - fn certify_block(&self, proposal: &BlockHeader) -> CommitQC { - let message_to_sign = validator::ReplicaCommit { - protocol_version: validator::ProtocolVersion::EARLIEST, - view: validator::ViewNumber(proposal.number.0), - proposal: *proposal, - }; - let signed_messages: Vec<_> = self - .validator_secret_keys - .iter() - .map(|sk| sk.sign_msg(message_to_sign)) - .collect(); - CommitQC::from(&signed_messages, &self.validator_set).unwrap() - } - - pub(crate) fn sync_state(&self, last_block_number: usize) -> BlockStoreState { - self.snapshot_sync_state(1..=last_block_number) - } - - pub(crate) fn snapshot_sync_state( - &self, - block_numbers: ops::RangeInclusive, - ) -> BlockStoreState { - assert!(!block_numbers.is_empty()); - BlockStoreState { - first: self.final_blocks[*block_numbers.start()] - .justification - .clone(), - last: self.final_blocks[*block_numbers.end()] - .justification - .clone(), - } - } - - pub(crate) fn send_block( - &self, - number: BlockNumber, - response: oneshot::Sender, - ) { - let final_block = self.final_blocks[number.0 as usize].clone(); - match response.send(Ok(final_block)) { - Ok(()) => tracing::info!(?number, "responded to get_block()"), - Err(_) => tracing::info!(?number, "failed to respond to get_block()"), - } - } +pub(crate) fn sync_state(setup: &GenesisSetup, last_block_number: usize) -> BlockStoreState { + snapshot_sync_state(setup, 1..=last_block_number) } -#[tokio::test] -async fn subscribing_to_state_updates() { - abort_on_panic(); - - let ctx = &ctx::test_root(&ctx::RealClock); - let rng = &mut ctx.rng(); - let protocol_version = validator::ProtocolVersion::EARLIEST; - let genesis_block = make_genesis_block(rng, protocol_version); - let block_1 = make_block(rng, genesis_block.header(), protocol_version); - - let (storage, runner) = make_store(ctx, genesis_block.clone()).await; - let (actor_pipe, _dispatcher_pipe) = pipe::new(); - let mut state_subscriber = storage.subscribe(); - - let cfg: Config = rng.gen(); - scope::run!(ctx, |ctx, s| async { - s.spawn_bg(runner.run(ctx)); - s.spawn_bg(cfg.run(ctx, actor_pipe, storage.clone())); - s.spawn_bg(async { - assert!(ctx.sleep(TEST_TIMEOUT).await.is_err(), "Test timed out"); - anyhow::Ok(()) - }); - - let state = state_subscriber.borrow().clone(); - assert_eq!(state.first, genesis_block.justification); - assert_eq!(state.last, genesis_block.justification); - storage.queue_block(ctx, block_1.clone()).await.unwrap(); - - let state = sync::wait_for(ctx, &mut state_subscriber, |state| { - state.next() > block_1.header().number - }) - .await - .unwrap() - .clone(); - assert_eq!(state.first, genesis_block.justification); - assert_eq!(state.last, block_1.justification); - Ok(()) - }) - .await - .unwrap(); +pub(crate) fn snapshot_sync_state( + setup: &GenesisSetup, + range: ops::RangeInclusive, +) -> BlockStoreState { + assert!(!range.is_empty()); + BlockStoreState { + first: setup.blocks[*range.start()].justification.clone(), + last: setup.blocks[*range.end()].justification.clone(), + } } -#[tokio::test] -async fn getting_blocks() { - abort_on_panic(); - - let ctx = &ctx::test_root(&ctx::RealClock); - let rng = &mut ctx.rng(); - let protocol_version = validator::ProtocolVersion::EARLIEST; - let genesis_block = make_genesis_block(rng, protocol_version); - - let (storage, runner) = make_store(ctx, genesis_block.clone()).await; - let (actor_pipe, dispatcher_pipe) = pipe::new(); - - let cfg: Config = rng.gen(); - scope::run!(ctx, |ctx, s| async { - s.spawn_bg(runner.run(ctx)); - let blocks = iter::successors(Some(genesis_block), |parent| { - Some(make_block(rng, parent.header(), protocol_version)) - }); - let blocks: Vec<_> = blocks.take(5).collect(); - for block in &blocks { - storage.queue_block(ctx, block.clone()).await.unwrap(); - } - s.spawn_bg(cfg.run(ctx, actor_pipe, storage.clone())); - s.spawn_bg(async { - assert!(ctx.sleep(TEST_TIMEOUT).await.is_err(), "Test timed out"); - anyhow::Ok(()) - }); - - for (i, expected_block) in blocks.iter().enumerate() { - let (response, response_receiver) = oneshot::channel(); - let input_message = SyncBlocksRequest::GetBlock { - block_number: BlockNumber(i as u64), - response, - }; - dispatcher_pipe.send.send(input_message.into()); - let block = response_receiver.recv_or_disconnected(ctx).await???; - assert_eq!(block, *expected_block); - } - - let (response, response_receiver) = oneshot::channel(); - let input_message = SyncBlocksRequest::GetBlock { - block_number: BlockNumber(777), - response, - }; - dispatcher_pipe.send.send(input_message.into()); - let block_err = response_receiver - .recv_or_disconnected(ctx) - .await?? - .unwrap_err(); - assert!(matches!(block_err, GetBlockError::NotSynced)); - - Ok(()) - }) - .await - .unwrap(); +pub(crate) fn send_block( + setup: &GenesisSetup, + number: BlockNumber, + response: oneshot::Sender>, +) { + let block = setup + .blocks + .get(number.0 as usize) + .cloned() + .ok_or(GetBlockError::NotAvailable); + response.send(block).ok(); } diff --git a/node/libs/concurrency/src/ctx/channel.rs b/node/libs/concurrency/src/ctx/channel.rs index fce34f3a..73d5abac 100644 --- a/node/libs/concurrency/src/ctx/channel.rs +++ b/node/libs/concurrency/src/ctx/channel.rs @@ -120,6 +120,11 @@ impl Sender { pub fn try_send(&self, v: T) -> Result<(), FullError> { self.0.try_send(v).map_err(|_| FullError) } + + /// Waits until receiver is dropped. + pub async fn closed(&self, ctx: &ctx::Ctx) -> ctx::OrCanceled<()> { + ctx.wait(self.0.closed()).await + } } impl Receiver { diff --git a/node/libs/concurrency/src/oneshot.rs b/node/libs/concurrency/src/oneshot.rs index 575a6b38..27c487f9 100644 --- a/node/libs/concurrency/src/oneshot.rs +++ b/node/libs/concurrency/src/oneshot.rs @@ -16,6 +16,18 @@ pub fn channel() -> (Sender, Receiver) { } impl Receiver { + /// Awaits for a message from the channel. + /// Waits for cancelation if the channel has been disconnected. + pub async fn recv(self, ctx: &ctx::Ctx) -> ctx::OrCanceled { + ctx.wait(async move { + match self.0.await { + Ok(v) => v, + Err(_) => std::future::pending().await, + } + }) + .await + } + /// Awaits for a message from the channel. /// Returns an error if channel is empty and disconnected. pub async fn recv_or_disconnected( diff --git a/node/libs/concurrency/src/signal.rs b/node/libs/concurrency/src/signal.rs index b2adcd93..2c28dbb0 100644 --- a/node/libs/concurrency/src/signal.rs +++ b/node/libs/concurrency/src/signal.rs @@ -1,6 +1,7 @@ //! Simple signal reporting primitive. A building block for `Scope` and `Ctx`. //! Can also be used outside of the crate, but only together with `Ctx`. use crate::ctx; +use std::future::Future; /// Communication channel over which a signal can be sent only once. /// Useful for reporting very simple events. @@ -37,8 +38,11 @@ impl Once { } /// Waits for the signal to be sent. - pub async fn recv(&self, ctx: &ctx::Ctx) -> ctx::OrCanceled<()> { - ctx.wait(self.cancel_safe_recv()).await + pub fn recv<'a>( + &'a self, + ctx: &'a ctx::Ctx, + ) -> ctx::CtxAware>> { + ctx.wait(self.cancel_safe_recv()) } /// Checks if send() was already called. diff --git a/node/libs/concurrency/src/sync/mod.rs b/node/libs/concurrency/src/sync/mod.rs index 9960c6d7..f424b4b7 100644 --- a/node/libs/concurrency/src/sync/mod.rs +++ b/node/libs/concurrency/src/sync/mod.rs @@ -128,7 +128,11 @@ pub async fn wait_for<'a, T>( recv: &'a mut watch::Receiver, pred: impl Fn(&T) -> bool, ) -> ctx::OrCanceled> { - Ok(ctx.wait(recv.wait_for(pred)).await?.unwrap()) + if let Ok(res) = ctx.wait(recv.wait_for(pred)).await? { + return Ok(res); + } + ctx.canceled().await; + Err(ctx::Canceled) } struct ExclusiveLockInner { diff --git a/node/libs/concurrency/src/testonly.rs b/node/libs/concurrency/src/testonly.rs index 530fc46c..c726eef9 100644 --- a/node/libs/concurrency/src/testonly.rs +++ b/node/libs/concurrency/src/testonly.rs @@ -1,5 +1,4 @@ -//! Testonly utilities for concurrency crate. -#![doc(hidden)] +//! Testonly utilities for concurrent tests. use std::{future::Future, io::IsTerminal as _}; /// Iff the current process is executed under @@ -14,6 +13,7 @@ pub fn abort_on_panic() { .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) .with_test_writer() .with_ansi(std::env::var("NO_COLOR").is_err() && std::io::stdout().is_terminal()) + .with_line_number(true) .try_init(); // I don't know a way to set panic=abort for nextest builds in compilation time, so we set it @@ -35,6 +35,26 @@ pub fn abort_on_panic() { })); } +/// Guard which has to be dropped before timeout is reached. +/// Otherwise the test will panic. +#[allow(unused_tuple_struct_fields)] +pub struct TimeoutGuard(std::sync::mpsc::Sender<()>); + +/// Panics if (real time) timeout is reached before ctx is canceled. +/// Implemented without using tokio, so that it cannot delay the timeout +/// evaluation. +pub fn set_timeout(timeout: time::Duration) -> TimeoutGuard { + use std::sync::mpsc; + let (send, recv) = mpsc::channel(); + std::thread::spawn(move || { + if let Err(mpsc::RecvTimeoutError::Timeout) = recv.recv_timeout(timeout.try_into().unwrap()) + { + panic!("TIMEOUT"); + } + }); + TimeoutGuard(send) +} + /// Executes a test under multiple configurations of the tokio runtime. pub fn with_runtimes(test: impl Fn() -> Fut) { for (name, mut b) in [ diff --git a/node/libs/crypto/src/bn254/mod.rs b/node/libs/crypto/src/bn254/mod.rs index 66592b67..df02482f 100644 --- a/node/libs/crypto/src/bn254/mod.rs +++ b/node/libs/crypto/src/bn254/mod.rs @@ -31,6 +31,11 @@ mod testonly; pub struct SecretKey(Fr); impl SecretKey { + /// Generates a secret key from a cryptographically-secure entropy source. + pub fn generate() -> Self { + Self(rand04::Rand::rand(&mut rand04::OsRng::new().unwrap())) + } + /// Gets the corresponding [`PublicKey`] for this [`SecretKey`] pub fn public(&self) -> PublicKey { let p = G2Affine::one().mul(self.0); diff --git a/node/libs/crypto/src/ed25519/mod.rs b/node/libs/crypto/src/ed25519/mod.rs index f4f11460..f8340524 100644 --- a/node/libs/crypto/src/ed25519/mod.rs +++ b/node/libs/crypto/src/ed25519/mod.rs @@ -65,13 +65,13 @@ impl ByteFmt for PublicKey { } fn encode(&self) -> Vec { - self.0.to_bytes().to_vec() + self.0.as_bytes().to_vec() } } impl std::hash::Hash for PublicKey { fn hash(&self, state: &mut H) { - state.write(&self.0.to_bytes()); + state.write(self.0.as_bytes()); } } @@ -85,6 +85,18 @@ impl PartialEq for PublicKey { impl Eq for PublicKey {} +impl PartialOrd for PublicKey { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for PublicKey { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0.as_bytes().cmp(other.0.as_bytes()) + } +} + /// ed25519 signature. #[derive(Clone, Debug, PartialEq, Eq)] pub struct Signature(ed::Signature); diff --git a/node/libs/protobuf/src/testonly.rs b/node/libs/protobuf/src/testonly.rs index 15272cd8..bae47cf0 100644 --- a/node/libs/protobuf/src/testonly.rs +++ b/node/libs/protobuf/src/testonly.rs @@ -26,7 +26,7 @@ pub fn test_encode(rng: &mut /// Syntax sugar for `test_encode`, /// because `test_encode(rng,&rng::gen())` doesn't compile. #[track_caller] -pub fn test_encode_random(rng: &mut R) +pub fn test_encode_random(rng: &mut impl Rng) where Standard: Distribution, { diff --git a/node/libs/roles/src/node/keys.rs b/node/libs/roles/src/node/keys.rs index 0d8447a8..454ca2d3 100644 --- a/node/libs/roles/src/node/keys.rs +++ b/node/libs/roles/src/node/keys.rs @@ -13,6 +13,11 @@ use zksync_consensus_utils::enum_util::Variant; pub struct SecretKey(pub(super) Arc); impl SecretKey { + /// Generates a secret key from a cryptographically-secure entropy source. + pub fn generate() -> Self { + Self(Arc::new(ed25519::SecretKey::generate())) + } + /// Sign a message hash. pub fn sign(&self, msg_hash: &MsgHash) -> Signature { Signature(self.0.sign(&ByteFmt::encode(msg_hash))) @@ -28,11 +33,6 @@ impl SecretKey { } } - /// Generate a new random secret key. - pub fn generate() -> Self { - Self(Arc::new(ed25519::SecretKey::generate())) - } - /// Get the public key corresponding to this secret key. pub fn public(&self) -> PublicKey { PublicKey(self.0.public()) @@ -64,7 +64,7 @@ impl fmt::Debug for SecretKey { } /// A node's public key. -#[derive(Clone, PartialEq, Eq, Hash)] +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct PublicKey(pub(super) ed25519::PublicKey); impl PublicKey { diff --git a/node/libs/roles/src/node/tests.rs b/node/libs/roles/src/node/tests.rs index e6ab6397..493d4ce8 100644 --- a/node/libs/roles/src/node/tests.rs +++ b/node/libs/roles/src/node/tests.rs @@ -37,10 +37,10 @@ fn test_text_encoding() { fn test_schema_encoding() { let ctx = &ctx::test_root(&ctx::RealClock); let rng = &mut ctx.rng(); - test_encode_random::<_, Signed>(rng); + test_encode_random::>(rng); let key = rng.gen::().public(); test_encode(rng, &key); - test_encode_random::<_, Signature>(rng); + test_encode_random::(rng); } #[test] diff --git a/node/libs/roles/src/validator/keys/secret_key.rs b/node/libs/roles/src/validator/keys/secret_key.rs index f5126cbf..644bde2a 100644 --- a/node/libs/roles/src/validator/keys/secret_key.rs +++ b/node/libs/roles/src/validator/keys/secret_key.rs @@ -1,6 +1,5 @@ use super::{PublicKey, Signature}; use crate::validator::messages::{Msg, MsgHash, Signed}; -use rand::Rng; use std::{fmt, sync::Arc}; use zksync_consensus_crypto::{bn254, ByteFmt, Text, TextFmt}; use zksync_consensus_utils::enum_util::Variant; @@ -12,9 +11,9 @@ use zksync_consensus_utils::enum_util::Variant; pub struct SecretKey(pub(crate) Arc); impl SecretKey { - /// Generate a new secret key. - pub fn generate(rng: &mut R) -> Self { - Self(Arc::new(rng.gen())) + /// Generates a secret key from a cryptographically-secure entropy source. + pub fn generate() -> Self { + Self(Arc::new(bn254::SecretKey::generate())) } /// Public key corresponding to this secret key. diff --git a/node/libs/roles/src/validator/mod.rs b/node/libs/roles/src/validator/mod.rs index 2091a1ad..66ae9a12 100644 --- a/node/libs/roles/src/validator/mod.rs +++ b/node/libs/roles/src/validator/mod.rs @@ -9,3 +9,6 @@ mod messages; pub mod testonly; pub use self::{keys::*, messages::*}; +// TODO(gprusak): it should be ok to have an unsigned +// genesis. For now we need a way to bootstrap the chain. +pub use testonly::GenesisSetup; diff --git a/node/libs/roles/src/validator/testonly.rs b/node/libs/roles/src/validator/testonly.rs index fb62e749..57e24c98 100644 --- a/node/libs/roles/src/validator/testonly.rs +++ b/node/libs/roles/src/validator/testonly.rs @@ -33,16 +33,120 @@ pub fn make_justification( } } -/// Constructs a genesis block with random payload. -/// WARNING: it is not a fully correct FinalBlock. -pub fn make_genesis_block(rng: &mut R, protocol_version: ProtocolVersion) -> FinalBlock { - let payload: Payload = rng.gen(); - let header = BlockHeader::genesis(payload.hash(), BlockNumber(0)); - let justification = make_justification(rng, &header, protocol_version); - FinalBlock { - payload, - justification, +impl<'a> BlockBuilder<'a> { + /// Builds `GenesisSetup`. + pub fn push(self) { + let msgs: Vec<_> = self + .setup + .keys + .iter() + .map(|sk| sk.sign_msg(self.msg)) + .collect(); + let justification = CommitQC::from(&msgs, &self.setup.validator_set()).unwrap(); + self.setup.blocks.push(FinalBlock { + payload: self.payload, + justification, + }); + } + + /// Sets `protocol_version`. + pub fn protocol_version(mut self, protocol_version: ProtocolVersion) -> Self { + self.msg.protocol_version = protocol_version; + self + } + + /// Sets `block_number`. + pub fn block_number(mut self, block_number: BlockNumber) -> Self { + self.msg.proposal.number = block_number; + self + } + + /// Sets `payload`. + pub fn payload(mut self, payload: Payload) -> Self { + self.msg.proposal.payload = payload.hash(); + self.payload = payload; + self + } +} + +/// GenesisSetup. +#[derive(Debug, Clone)] +pub struct GenesisSetup { + /// Validators' secret keys. + pub keys: Vec, + /// Initial blocks. + pub blocks: Vec, +} + +/// Builder of GenesisSetup. +pub struct BlockBuilder<'a> { + setup: &'a mut GenesisSetup, + msg: ReplicaCommit, + payload: Payload, +} + +impl GenesisSetup { + /// Constructs GenesisSetup with no blocks. + pub fn empty(rng: &mut impl Rng, validators: usize) -> Self { + Self { + keys: (0..validators).map(|_| rng.gen()).collect(), + blocks: vec![], + } + } + + /// Constructs GenesisSetup with genesis block. + pub fn new(rng: &mut impl Rng, validators: usize) -> Self { + let mut this = Self::empty(rng, validators); + this.push_block(rng.gen()); + this + } + + /// Returns a builder for the next block. + pub fn next_block(&mut self) -> BlockBuilder { + let parent = self.blocks.last().map(|b| b.justification.message); + let payload = Payload(vec![]); + BlockBuilder { + setup: self, + msg: ReplicaCommit { + protocol_version: parent + .map(|m| m.protocol_version) + .unwrap_or(ProtocolVersion::EARLIEST), + view: parent.map(|m| m.view.next()).unwrap_or(ViewNumber(0)), + proposal: parent + .map(|m| BlockHeader::new(&m.proposal, payload.hash())) + .unwrap_or(BlockHeader::genesis(payload.hash(), BlockNumber(0))), + }, + payload, + } } + + /// Pushes the next block with the given payload. + pub fn push_block(&mut self, payload: Payload) { + self.next_block().payload(payload).push(); + } + + /// Pushes `count` blocks with a random payload. + pub fn push_blocks(&mut self, rng: &mut impl Rng, count: usize) { + for _ in 0..count { + self.push_block(rng.gen()); + } + } + + /// ValidatorSet. + pub fn validator_set(&self) -> ValidatorSet { + ValidatorSet::new(self.keys.iter().map(|k| k.public())).unwrap() + } +} + +/// Constructs a genesis block with random payload. +pub fn make_genesis_block(rng: &mut impl Rng, protocol_version: ProtocolVersion) -> FinalBlock { + let mut setup = GenesisSetup::new(rng, 3); + setup + .next_block() + .protocol_version(protocol_version) + .payload(rng.gen()) + .push(); + setup.blocks[0].clone() } /// Constructs a random block with a given parent. @@ -195,7 +299,7 @@ impl Distribution for Standard { impl Distribution for Standard { fn sample(&self, rng: &mut R) -> Payload { - let size: usize = rng.gen_range(0..11); + let size: usize = rng.gen_range(500..1000); Payload((0..size).map(|_| rng.gen()).collect()) } } diff --git a/node/libs/roles/src/validator/tests.rs b/node/libs/roles/src/validator/tests.rs index 4ef8d885..dc26f1cf 100644 --- a/node/libs/roles/src/validator/tests.rs +++ b/node/libs/roles/src/validator/tests.rs @@ -90,19 +90,19 @@ fn test_text_encoding() { fn test_schema_encoding() { let ctx = ctx::test_root(&ctx::RealClock); let rng = &mut ctx.rng(); - test_encode_random::<_, PayloadHash>(rng); - test_encode_random::<_, BlockHeader>(rng); - test_encode_random::<_, BlockHeaderHash>(rng); - test_encode_random::<_, FinalBlock>(rng); - test_encode_random::<_, Signed>(rng); - test_encode_random::<_, PrepareQC>(rng); - test_encode_random::<_, CommitQC>(rng); - test_encode_random::<_, Msg>(rng); - test_encode_random::<_, MsgHash>(rng); - test_encode_random::<_, Signers>(rng); - test_encode_random::<_, PublicKey>(rng); - test_encode_random::<_, Signature>(rng); - test_encode_random::<_, AggregateSignature>(rng); + test_encode_random::(rng); + test_encode_random::(rng); + test_encode_random::(rng); + test_encode_random::(rng); + test_encode_random::>(rng); + test_encode_random::(rng); + test_encode_random::(rng); + test_encode_random::(rng); + test_encode_random::(rng); + test_encode_random::(rng); + test_encode_random::(rng); + test_encode_random::(rng); + test_encode_random::(rng); } #[test] @@ -113,8 +113,8 @@ fn test_signature_verify() { let msg1: MsgHash = rng.gen(); let msg2: MsgHash = rng.gen(); - let key1 = SecretKey::generate(rng); - let key2 = SecretKey::generate(rng); + let key1: SecretKey = rng.gen(); + let key2: SecretKey = rng.gen(); let sig1 = key1.sign_hash(&msg1); @@ -136,8 +136,8 @@ fn test_agg_signature_verify() { let msg1: MsgHash = rng.gen(); let msg2: MsgHash = rng.gen(); - let key1 = SecretKey::generate(rng); - let key2 = SecretKey::generate(rng); + let key1: SecretKey = rng.gen(); + let key2: SecretKey = rng.gen(); let sig1 = key1.sign_hash(&msg1); let sig2 = key2.sign_hash(&msg2); diff --git a/node/libs/storage/src/block_store/mod.rs b/node/libs/storage/src/block_store/mod.rs index d56121d7..f58c551d 100644 --- a/node/libs/storage/src/block_store/mod.rs +++ b/node/libs/storage/src/block_store/mod.rs @@ -6,7 +6,7 @@ use zksync_consensus_roles::validator; mod metrics; /// State of the `BlockStore`: continuous range of blocks. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct BlockStoreState { /// Stored block with the lowest number. pub first: validator::CommitQC, @@ -102,6 +102,11 @@ impl BlockStoreRunner { .start(); self.0.persistent.store_next_block(ctx, &block).await?; t.observe(); + tracing::info!( + "stored block #{}: {:#?}", + block.header().number, + block.header().hash() + ); self.0.inner.send_modify(|inner| { debug_assert_eq!(inner.persisted_state.next(), block.header().number); diff --git a/node/libs/storage/src/testonly/mod.rs b/node/libs/storage/src/testonly/mod.rs index a4b1481d..5977f10e 100644 --- a/node/libs/storage/src/testonly/mod.rs +++ b/node/libs/storage/src/testonly/mod.rs @@ -1,6 +1,7 @@ //! Test-only utilities. -use crate::{PersistentBlockStore, Proposal, ReplicaState}; +use crate::{BlockStore, BlockStoreRunner, PersistentBlockStore, Proposal, ReplicaState}; use rand::{distributions::Standard, prelude::Distribution, Rng}; +use std::sync::Arc; use zksync_concurrency::ctx; use zksync_consensus_roles::validator; @@ -27,6 +28,16 @@ impl Distribution for Standard { } } +/// Constructs a new in-memory store with a genesis block. +pub async fn new_store( + ctx: &ctx::Ctx, + genesis: &validator::FinalBlock, +) -> (Arc, BlockStoreRunner) { + BlockStore::new(ctx, Box::new(in_memory::BlockStore::new(genesis.clone()))) + .await + .unwrap() +} + /// Dumps all the blocks stored in `store`. pub async fn dump(ctx: &ctx::Ctx, store: &dyn PersistentBlockStore) -> Vec { let range = store.state(ctx).await.unwrap(); @@ -40,19 +51,3 @@ pub async fn dump(ctx: &ctx::Ctx, store: &dyn PersistentBlockStore) -> Vec impl Iterator { - let mut rng = ctx.rng(); - let v = validator::ProtocolVersion::EARLIEST; - std::iter::successors( - Some(validator::testonly::make_genesis_block(&mut rng, v)), - move |parent| { - Some(validator::testonly::make_block( - &mut rng, - parent.header(), - v, - )) - }, - ) -} diff --git a/node/libs/storage/src/tests.rs b/node/libs/storage/src/tests.rs index fa94acdb..723fd250 100644 --- a/node/libs/storage/src/tests.rs +++ b/node/libs/storage/src/tests.rs @@ -1,13 +1,17 @@ use super::*; -use crate::ReplicaState; -use zksync_concurrency::ctx; +use crate::{testonly::new_store, ReplicaState}; +use zksync_concurrency::{ctx, scope, sync, testonly::abort_on_panic}; +use zksync_consensus_roles::validator; #[tokio::test] async fn test_inmemory_block_store() { let ctx = &ctx::test_root(&ctx::RealClock); + let rng = &mut ctx.rng(); let store = &testonly::in_memory::BlockStore::default(); + let mut setup = validator::testonly::GenesisSetup::empty(rng, 3); + setup.push_blocks(rng, 5); let mut want = vec![]; - for block in testonly::random_blocks(ctx).take(5) { + for block in setup.blocks { store.store_next_block(ctx, &block).await.unwrap(); want.push(block); assert_eq!(want, testonly::dump(ctx, store).await); @@ -18,5 +22,39 @@ async fn test_inmemory_block_store() { fn test_schema_encode_decode() { let ctx = ctx::test_root(&ctx::RealClock); let rng = &mut ctx.rng(); - zksync_protobuf::testonly::test_encode_random::<_, ReplicaState>(rng); + zksync_protobuf::testonly::test_encode_random::(rng); +} + +#[tokio::test] +async fn test_state_updates() { + abort_on_panic(); + let ctx = &ctx::test_root(&ctx::RealClock); + let rng = &mut ctx.rng(); + let mut genesis = validator::testonly::GenesisSetup::new(rng, 1); + genesis.push_blocks(rng, 1); + + let (store, runner) = new_store(ctx, &genesis.blocks[0]).await; + scope::run!(ctx, |ctx, s| async { + s.spawn_bg(runner.run(ctx)); + let sub = &mut store.subscribe(); + let state = sub.borrow().clone(); + assert_eq!(state.first, genesis.blocks[0].justification); + assert_eq!(state.last, genesis.blocks[0].justification); + + store + .queue_block(ctx, genesis.blocks[1].clone()) + .await + .unwrap(); + + let state = sync::wait_for(ctx, sub, |state| { + state.last == genesis.blocks[1].justification + }) + .await? + .clone(); + assert_eq!(state.first, genesis.blocks[0].justification); + assert_eq!(state.last, genesis.blocks[1].justification); + Ok(()) + }) + .await + .unwrap(); } diff --git a/node/tools/src/bin/keys.rs b/node/tools/src/bin/keys.rs index 2f2a0bfd..c0ba5832 100644 --- a/node/tools/src/bin/keys.rs +++ b/node/tools/src/bin/keys.rs @@ -1,15 +1,17 @@ //! This tool generates a validator key pair and prints it to stdout. #![allow(clippy::print_stdout)] +use crypto::TextFmt as _; use zksync_consensus_crypto as crypto; -use zksync_consensus_roles::validator; +use zksync_consensus_roles::{node, validator}; /// This tool generates a validator key pair and prints it to stdout. fn main() { - let key = validator::SecretKey::generate(&mut rand::rngs::OsRng); - let encoded_pk = crypto::TextFmt::encode(&key.public()); - let encoded_sk = crypto::TextFmt::encode(&key); - println!("Generating keypair:"); - println!("{encoded_pk}"); - println!("{encoded_sk}"); + let validator_key = validator::SecretKey::generate(); + let node_key = node::SecretKey::generate(); + println!("keys:"); + println!("{}", validator_key.encode()); + println!("{}", validator_key.public().encode()); + println!("{}", node_key.encode()); + println!("{}", node_key.public().encode()); } diff --git a/node/tools/src/bin/localnet_config.rs b/node/tools/src/bin/localnet_config.rs index 839e727d..c50d9d0f 100644 --- a/node/tools/src/bin/localnet_config.rs +++ b/node/tools/src/bin/localnet_config.rs @@ -3,7 +3,6 @@ use anyhow::Context as _; use clap::Parser; use rand::Rng; use std::{fs, net::SocketAddr, path::PathBuf}; -use zksync_consensus_bft::testonly; use zksync_consensus_crypto::TextFmt; use zksync_consensus_roles::{node, validator}; use zksync_consensus_tools::AppConfig; @@ -40,6 +39,9 @@ struct Args { /// Configs for , will be in directory // #[arg(long)] output_dir: PathBuf, + /// Block payload size in bytes. + #[arg(long, default_value_t = 1000000)] + payload_size: usize, } fn main() -> anyhow::Result<()> { @@ -60,16 +62,14 @@ fn main() -> anyhow::Result<()> { // Generate the keys for all the replicas. let rng = &mut rand::thread_rng(); - let validator_keys: Vec = (0..addrs.len()).map(|_| rng.gen()).collect(); - let node_keys: Vec = (0..addrs.len()).map(|_| rng.gen()).collect(); - // Generate the genesis block. - // TODO: generating genesis block shouldn't require knowing the private keys. - let (genesis, validator_set) = testonly::make_genesis( - &validator_keys, - validator::Payload(vec![]), - validator::BlockNumber(0), - ); + let mut genesis = validator::GenesisSetup::empty(rng, addrs.len()); + genesis + .next_block() + .payload(validator::Payload(vec![])) + .push(); + let validator_keys = genesis.keys.clone(); + let node_keys: Vec = (0..addrs.len()).map(|_| rng.gen()).collect(); // Each node will have `gossip_peers` outbound peers. let nodes = addrs.len(); @@ -81,8 +81,9 @@ fn main() -> anyhow::Result<()> { public_addr: addrs[i], metrics_server_addr, - validators: validator_set.clone(), - genesis_block: genesis.clone(), + validators: genesis.validator_set(), + genesis_block: genesis.blocks[0].clone(), + max_payload_size: args.payload_size, gossip_dynamic_inbound_limit: 0, gossip_static_inbound: [].into(), diff --git a/node/tools/src/config.rs b/node/tools/src/config.rs index d281e69e..16550f10 100644 --- a/node/tools/src/config.rs +++ b/node/tools/src/config.rs @@ -32,8 +32,9 @@ pub struct AppConfig { pub validators: validator::ValidatorSet, pub genesis_block: validator::FinalBlock, + pub max_payload_size: usize, - pub gossip_dynamic_inbound_limit: u64, + pub gossip_dynamic_inbound_limit: usize, pub gossip_static_inbound: HashSet, pub gossip_static_outbound: HashMap, } @@ -75,8 +76,12 @@ impl ProtoFmt for AppConfig { validators, genesis_block: read_required_text(&r.genesis_block).context("genesis_block")?, + max_payload_size: required(&r.max_payload_size) + .and_then(|x| Ok((*x).try_into()?)) + .context("max_payload_size")?, - gossip_dynamic_inbound_limit: *required(&r.gossip_dynamic_inbound_limit) + gossip_dynamic_inbound_limit: required(&r.gossip_dynamic_inbound_limit) + .and_then(|x| Ok((*x).try_into()?)) .context("gossip_dynamic_inbound_limit")?, gossip_static_inbound, gossip_static_outbound, @@ -91,8 +96,11 @@ impl ProtoFmt for AppConfig { validators: self.validators.iter().map(TextFmt::encode).collect(), genesis_block: Some(self.genesis_block.encode()), + max_payload_size: Some(self.max_payload_size.try_into().unwrap()), - gossip_dynamic_inbound_limit: Some(self.gossip_dynamic_inbound_limit), + gossip_dynamic_inbound_limit: Some( + self.gossip_dynamic_inbound_limit.try_into().unwrap(), + ), gossip_static_inbound: self .gossip_static_inbound .iter() @@ -185,6 +193,7 @@ impl Configs { gossip_dynamic_inbound_limit: self.app.gossip_dynamic_inbound_limit, gossip_static_inbound: self.app.gossip_static_inbound.clone(), gossip_static_outbound: self.app.gossip_static_outbound.clone(), + max_payload_size: self.app.max_payload_size, }, block_store, validator: self.validator_key.as_ref().map(|key| executor::Validator { @@ -193,7 +202,7 @@ impl Configs { public_addr: self.app.public_addr, }, replica_store: Box::new(store), - payload_manager: Box::new(bft::testonly::RandomPayload), + payload_manager: Box::new(bft::testonly::RandomPayload(self.app.max_payload_size)), }), }; Ok((e, runner)) diff --git a/node/tools/src/proto/mod.proto b/node/tools/src/proto/mod.proto index e857c8eb..3cac756d 100644 --- a/node/tools/src/proto/mod.proto +++ b/node/tools/src/proto/mod.proto @@ -69,6 +69,9 @@ message AppConfig { // Will be inserted to storage if not already present. optional string genesis_block = 6; // [required] FinalBlock + // Maximal size of the block payload. + optional uint64 max_payload_size = 11; // [required] B + // Gossip network // Limit on the number of gossip network inbound connections outside diff --git a/node/tools/src/tests.rs b/node/tools/src/tests.rs index 65760e3d..581f1ca2 100644 --- a/node/tools/src/tests.rs +++ b/node/tools/src/tests.rs @@ -5,7 +5,7 @@ use rand::{ }; use tempfile::TempDir; use zksync_concurrency::ctx; -use zksync_consensus_roles::node; +use zksync_consensus_roles::{node, validator::testonly::GenesisSetup}; use zksync_consensus_storage::{testonly, PersistentBlockStore}; use zksync_protobuf::testonly::test_encode_random; @@ -30,6 +30,7 @@ impl Distribution for Standard { gossip_static_outbound: (0..6) .map(|_| (rng.gen::().public(), make_addr(rng))) .collect(), + max_payload_size: rng.gen(), } } } @@ -38,18 +39,21 @@ impl Distribution for Standard { fn test_schema_encoding() { let ctx = ctx::test_root(&ctx::RealClock); let rng = &mut ctx.rng(); - test_encode_random::<_, AppConfig>(rng); + test_encode_random::(rng); } #[tokio::test] async fn test_reopen_rocksdb() { let ctx = &ctx::test_root(&ctx::RealClock); + let rng = &mut ctx.rng(); let dir = TempDir::new().unwrap(); + let mut setup = GenesisSetup::empty(rng, 3); + setup.push_blocks(rng, 5); let mut want = vec![]; - for b in testonly::random_blocks(ctx).take(5) { + for b in &setup.blocks { let store = store::RocksDB::open(dir.path()).await.unwrap(); - store.store_next_block(ctx, &b).await.unwrap(); - want.push(b); + store.store_next_block(ctx, b).await.unwrap(); + want.push(b.clone()); assert_eq!(want, testonly::dump(ctx, &store).await); } }