From d41ac00e00ec66742255a746a25443e21bebe9ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20Fran=C3=A7a?= Date: Fri, 4 Oct 2024 17:01:08 +0100 Subject: [PATCH] docs: detailed use of get_justification in informal spec (#204) --- node/Cargo.lock | 24 +- spec/informal-spec/proposer.rs | 12 +- spec/informal-spec/replica.rs | 413 +++++++++++++++++---------------- spec/informal-spec/types.rs | 8 + 4 files changed, 240 insertions(+), 217 deletions(-) diff --git a/node/Cargo.lock b/node/Cargo.lock index 0bc1fdaf..7834f5fe 100644 --- a/node/Cargo.lock +++ b/node/Cargo.lock @@ -3215,7 +3215,7 @@ dependencies = [ [[package]] name = "tester" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "clap", @@ -3923,7 +3923,7 @@ dependencies = [ [[package]] name = "zksync_concurrency" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "assert_matches", @@ -3941,7 +3941,7 @@ dependencies = [ [[package]] name = "zksync_consensus_bft" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "assert_matches", @@ -3965,7 +3965,7 @@ dependencies = [ [[package]] name = "zksync_consensus_crypto" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "blst", @@ -3985,7 +3985,7 @@ dependencies = [ [[package]] name = "zksync_consensus_executor" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "async-trait", @@ -4007,7 +4007,7 @@ dependencies = [ [[package]] name = "zksync_consensus_network" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "assert_matches", @@ -4044,7 +4044,7 @@ dependencies = [ [[package]] name = "zksync_consensus_roles" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "assert_matches", @@ -4065,7 +4065,7 @@ dependencies = [ [[package]] name = "zksync_consensus_storage" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "assert_matches", @@ -4087,7 +4087,7 @@ dependencies = [ [[package]] name = "zksync_consensus_tools" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "async-trait", @@ -4122,7 +4122,7 @@ dependencies = [ [[package]] name = "zksync_consensus_utils" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "rand", @@ -4132,7 +4132,7 @@ dependencies = [ [[package]] name = "zksync_protobuf" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "bit-vec", @@ -4154,7 +4154,7 @@ dependencies = [ [[package]] name = "zksync_protobuf_build" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "heck", diff --git a/spec/informal-spec/proposer.rs b/spec/informal-spec/proposer.rs index 12268201..49b7d9cf 100644 --- a/spec/informal-spec/proposer.rs +++ b/spec/informal-spec/proposer.rs @@ -15,13 +15,21 @@ fn on_start(replica_state: &ReplicaState) { // Get the justification for this view. If we have both a commit QC // and a timeout QC for this view (highly unlikely), we should prefer // to use the commit QC. - let justification = replica_state.get_justification(cur_view); + let justification = replica_state.create_justification(); + + assert!(justification.view() == cur_view); // Get the block number and check if this must be a reproposal. let (block_number, opt_block_hash) = justification.get_implied_block(); // Propose only if you have collected all committed blocks so far. - assert!(block_number == self.committed_blocks.last().map_or(0,|b|b.commit_qc.vote.block_number+1)); + assert!( + block_number + == self + .committed_blocks + .last() + .map_or(0, |b| b.commit_qc.vote.block_number + 1) + ); // Now we create the block. let block = if opt_block_hash.is_some() { diff --git a/spec/informal-spec/replica.rs b/spec/informal-spec/replica.rs index 901c5dc2..5c6e692b 100644 --- a/spec/informal-spec/replica.rs +++ b/spec/informal-spec/replica.rs @@ -1,12 +1,5 @@ // Replica -/// A block with a matching valid certificate. -/// invariants: hash(block) == commit_qc.vote.block_hash -struct CommittedBlock { - block: Block, - commit_qc: CommitQC, -} - struct ReplicaState { // The view this replica is currently in. view: ViewNumber, @@ -40,217 +33,231 @@ enum Phase { Timeout } -// This is called when the replica starts. At the beginning of the consensus. -// It is a loop that takes incoming messages and calls the corresponding -// method for each message. -fn on_start(self) { - // Imagine there's a timer util that just has two states (finished or not) and - // counts down from some given duration. For example, it counts down from 1s. - // If that time elapses, the timer will change state to finished. - // If it is reset before that, it starts counting down again from 1s. - let timer = Timer::new(duration); - - // Get the current view. - let mut cur_view = self.view; - - loop { - // If the view has increased before the timeout, we reset the timer. - if cur_view < self.view { - cur_view = self.view; - timer.reset(); - } - - // If the timer has finished, we send a timeout vote. - // If this is the first view, we immediately timeout. This will force the replicas - // to synchronize right at the beginning and will provide a justification for the - // proposer at view 1. - // If we have already timed out, we don't need to send another timeout vote. - if (timer.is_finished() || cur_view == 0) && self.phase != Phase::Timeout { - let vote = TimeoutVote::new(self.view, - self.high_vote, - self.high_commit_qc); - // Update our state so that we can no longer vote commit in this view. - self.phase = Phase::Timeout; - - // Send the vote to all replicas (including ourselves). - self.send(vote); - } - - // Try to get a message from the message queue and process it. We don't - // detail the message queue structure since it's boilerplate. - if let Some(message) = message_queue.pop() { - match message { - Proposal(msg) => { - self.on_proposal(msg); - } - Commit(msg) => { - self.on_commit(msg); - } - Timeout(msg) => { - self.on_timeout(msg); - } - NewView(msg) => { - self.on_new_view(msg); +impl ReplicaState { + // This is called when the replica starts. At the beginning of the consensus. + // It is a loop that takes incoming messages and calls the corresponding + // method for each message. + fn on_start(&mut self) { + // Imagine there's a timer util that just has two states (finished or not) and + // counts down from some given duration. For example, it counts down from 1s. + // If that time elapses, the timer will change state to finished. + // If it is reset before that, it starts counting down again from 1s. + let timer = Timer::new(duration); + + // Get the current view. + let mut cur_view = self.view; + + loop { + // If the view has increased before the timeout, we reset the timer. + if cur_view < self.view { + cur_view = self.view; + timer.reset(); + } + + // If the timer has finished, we send a timeout vote. + // If this is the first view, we immediately timeout. This will force the replicas + // to synchronize right at the beginning and will provide a justification for the + // proposer at view 1. + // If we have already timed out, we don't need to send another timeout vote. + if (timer.is_finished() || cur_view == 0) && self.phase != Phase::Timeout { + let vote = TimeoutVote::new(self.view, + self.high_vote, + self.high_commit_qc); + + // Update our state so that we can no longer vote commit in this view. + self.phase = Phase::Timeout; + + // Send the vote to all replicas (including ourselves). + self.send(vote); + } + + // Try to get a message from the message queue and process it. We don't + // detail the message queue structure since it's boilerplate. + if let Some(message) = message_queue.pop() { + match message { + Proposal(msg) => { + self.on_proposal(msg); + } + Commit(msg) => { + self.on_commit(msg); + } + Timeout(msg) => { + self.on_timeout(msg); + } + NewView(msg) => { + self.on_new_view(msg); + } } } } + } + + fn on_proposal(&mut self, proposal: Proposal) { + // We only allow proposals for the current view if we have not voted in + // it yet. + assert!((proposal.view() == self.view && self.phase == Prepare) || proposal.view() > self.view); + + // We ignore proposals from the wrong leader. + assert!(proposal.leader() == leader(proposal.view())); + + // Check that the proposal is valid. + assert!(proposal.verify()); + + // Get the implied block number and hash (if any). + let (block_number, opt_block_hash) = proposal.justification.get_implied_block(); + + // Vote only if you have collected all committed blocks so far. + assert!(block_number == self.committed_blocks.last().map_or(0,|b|b.commit_qc.vote.block_number+1)); + + // Check if this is a reproposal or not, and do the necessary checks. + // As a side result, get the correct block hash. + let block_hash = match opt_block_hash { + Some(hash) => { + // This is a reproposal. We let the leader repropose blocks without sending + // them in the proposal (it sends only the number + hash). That allows a + // leader to repropose a block without having it stored. + // It is an optimization that allows us to not wait for a leader that has + // the previous proposal stored (which can take 4f views), and to somewhat + // speed up reproposals by skipping block broadcast. + // This only saves time because we have a gossip network running in parallel, + // and any time a replica is able to create a finalized block (by possessing + // both the block and the commit QC) it broadcasts the finalized block (this + // was meant to propagate the block to full nodes, but of course validators + // will end up receiving it as well). + // However, this can be difficult to model and we might want to just + // ignore the gossip network in the formal model. We will still have liveness + // but in the model we'll end up waiting 4f views to get a leader that has the + // previous block before proposing a new one. This is not that bad, since + // then we can be sure that the consensus will continue even if the gossip + // network is failing for some reason. + + // For sanity reasons, we'll check that there's no block in the proposal. + // But this check is completely unnecessary (in theory at least). + assert!(proposal.block.is_none()); + + hash + } + None => { + // This is a new proposal, so we need to verify it (i.e. execute it). + assert!(proposal.block.is_some()); + let block = proposal.block.unwrap(); + // To verify the block, replica just tries to apply it to the current + // state. Current state is the result of applying all the committed blocks until now. + assert!(self.verify_block(block_number, block)); + // We cache the new proposals, waiting for them to be committed. + self.cached_proposals.insert((block_number,proposal.block.hash()),block); + block.hash() + } + }; + + // Update the state. + let vote = CommitVote::new(proposal.view(), block_number, block_hash); + + self.view = proposal.view(); + self.phase = Phase::Commit; + self.high_vote = Some(vote); + match proposal.justification { + Commit(qc) => self.process_commit_qc(Some(qc)), + Timeout(qc) => { + self.process_commit_qc(qc.high_commit_qc); + self.high_timeout_qc = max(Some(qc), self.high_timeout_qc); + } + }; + + // Send the commit vote to all replicas (including ourselves). + self.send(vote); } -} - -fn on_proposal(self, proposal: Proposal) { - // We only allow proposals for the current view if we have not voted in - // it yet. - assert!((proposal.view() == self.view && self.phase == Prepare) || proposal.view() > self.view); - - // We ignore proposals from the wrong leader. - assert!(proposal.leader() == leader(proposal.view())); - - // Check that the proposal is valid. - assert!(proposal.verify()); - // Get the implied block number and hash (if any). - let (block_number, opt_block_hash) = proposal.justification.get_implied_block(); - - // Vote only if you have collected all committed blocks so far. - assert!(block_number == self.committed_blocks.last().map_or(0,|b|b.commit_qc.vote.block_number+1)); - - // Check if this is a reproposal or not, and do the necessary checks. - // As a side result, get the correct block hash. - let block_hash = match opt_block_hash { - Some(hash) => { - // This is a reproposal. We let the leader repropose blocks without sending - // them in the proposal (it sends only the number + hash). That allows a - // leader to repropose a block without having it stored. - // It is an optimization that allows us to not wait for a leader that has - // the previous proposal stored (which can take 4f views), and to somewhat - // speed up reproposals by skipping block broadcast. - // This only saves time because we have a gossip network running in parallel, - // and any time a replica is able to create a finalized block (by possessing - // both the block and the commit QC) it broadcasts the finalized block (this - // was meant to propagate the block to full nodes, but of course validators - // will end up receiving it as well). - // However, this can be difficult to model and we might want to just - // ignore the gossip network in the formal model. We will still have liveness - // but in the model we'll end up waiting 4f views to get a leader that has the - // previous block before proposing a new one. This is not that bad, since - // then we can be sure that the consensus will continue even if the gossip - // network is failing for some reason. - - // For sanity reasons, we'll check that there's no block in the proposal. - // But this check is completely unnecessary (in theory at least). - assert!(proposal.block.is_none()); - - hash + // Processed an (already verified) commit_qc received from the network + // as part of some message. It bumps the local high_commit_qc and if + // we have the proposal corresponding to this qc, we append it to the committed_blocks. + fn process_commit_qc(&mut self, qc_opt: Option) { + if let Some(qc) = qc_opt { + self.high_commit_qc = max(Some(qc), self.high_commit_qc); + let Some(block) = self.cached_proposals.get((qc.vote.block_number,qc.vote.block_hash)) else { return }; + if self.committed_blocks.len()==qc.vote.block_number { + self.committed_blocks.push(CommittedBlock{block,commit_qc:qc}); + } } - None => { - // This is a new proposal, so we need to verify it (i.e. execute it). - assert!(proposal.block.is_some()); - let block = proposal.block.unwrap(); - // To verify the block, replica just tries to apply it to the current - // state. Current state is the result of applying all the committed blocks until now. - assert!(self.verify_block(block_number, block)); - // We cache the new proposals, waiting for them to be committed. - self.cached_proposals.insert((block_number,proposal.block.hash()),block); - block.hash() + } + + fn on_commit(&mut self, sig_vote: SignedCommitVote) { + // If the vote isn't current, just ignore it. + assert!(sig_vote.view() >= self.view) + + // Check that the signed vote is valid. + assert!(sig_vote.verify()); + + // Store the vote. We will never store duplicate (same view and sender) votes. + // If we already have this vote, we exit early. + assert!(self.store(sig_vote).is_ok()); + + // Check if we now have a commit QC for this view. + if let Some(qc) = self.get_commit_qc(sig_vote.view()) { + self.process_commit_qc(Some(qc)); + self.start_new_view(sig_vote.view() + 1); } - }; - - // Update the state. - let vote = CommitVote::new(proposal.view(), block_number, block_hash); - - self.view = proposal.view(); - self.phase = Phase::Commit; - self.high_vote = Some(vote); - match proposal.justification { - Commit(qc) => self.process_commit_qc(Some(qc)), - Timeout(qc) => { + } + + fn on_timeout(&mut self, sig_vote: SignedTimeoutVote) { + // If the vote isn't current, just ignore it. + assert!(sig_vote.view() >= self.view) + + // Check that the signed vote is valid. + assert!(sig_vote.verify()); + + // Store the vote. We will never store duplicate (same view and sender) votes. + // If we already have this vote, we exit early. + assert!(self.store(sig_vote).is_ok()); + + // Check if we now have a timeout QC for this view. + if let Some(qc) = self.get_timeout_qc(sig_vote.view()) { self.process_commit_qc(qc.high_commit_qc); self.high_timeout_qc = max(Some(qc), self.high_timeout_qc); - } - }; - - // Send the commit vote to all replicas (including ourselves). - self.send(vote); -} - -// Processed an (already verified) commit_qc received from the network -// as part of some message. It bumps the local high_commit_qc and if -// we have the proposal corresponding to this qc, we append it to the committed_blocks. -fn process_commit_qc(self, qc_opt: Option) { - if let Some(qc) = qc_opt { - self.high_commit_qc = max(Some(qc), self.high_commit_qc); - let Some(block) = self.cached_proposals.get((qc.vote.block_number,qc.vote.block_hash)) else { return }; - if self.committed_blocks.len()==qc.vote.block_number { - self.committed_blocks.push(CommittedBlock{block,commit_qc:qc}); + self.start_new_view(sig_vote.view() + 1); } } -} - -fn on_commit(self, sig_vote: SignedCommitVote) { - // If the vote isn't current, just ignore it. - assert!(sig_vote.view() >= self.view) - - // Check that the signed vote is valid. - assert!(sig_vote.verify()); - - // Store the vote. We will never store duplicate (same view and sender) votes. - // If we already have this vote, we exit early. - assert!(self.store(sig_vote).is_ok()); - - // Check if we now have a commit QC for this view. - if let Some(qc) = self.get_commit_qc(sig_vote.view()) { - self.process_commit_qc(Some(qc)); - self.start_new_view(sig_vote.view() + 1); + + fn on_new_view(&mut self, new_view: NewView) { + // If the message isn't current, just ignore it. + assert!(new_view.view() >= self.view) + + // Check that the new view is valid. + assert!(new_view.verify()); + + // Update our state. + match new_view.justification { + Commit(qc) => self.process_commit_qc(Some(qc)), + Timeout(qc) => { + self.process_commit_qc(qc.high_commit_qc); + self.high_timeout_qc = max(Some(qc), self.high_timeout_qc); + } + }; + + if new_view.view() > self.view { + self.start_new_view(new_view.view()); + } } -} - -fn on_timeout(self, sig_vote: SignedTimeoutVote) { - // If the vote isn't current, just ignore it. - assert!(sig_vote.view() >= self.view) - - // Check that the signed vote is valid. - assert!(sig_vote.verify()); - - // Store the vote. We will never store duplicate (same view and sender) votes. - // If we already have this vote, we exit early. - assert!(self.store(sig_vote).is_ok()); - - // Check if we now have a timeout QC for this view. - if let Some(qc) = self.get_timeout_qc(sig_vote.view()) { - self.process_commit_qc(qc.high_commit_qc); - self.high_timeout_qc = max(Some(qc), self.high_timeout_qc); - self.start_new_view(sig_vote.view() + 1); + + fn start_new_view(&mut self, view: ViewNumber) { + self.view = view; + self.phase = Phase::Prepare; + + // Send a new view message to the other replicas, for synchronization. + let new_view = NewView::new(self.get_justification(view)); + + self.send(new_view); } -} -fn on_new_view(self, new_view: NewView) { - // If the message isn't current, just ignore it. - assert!(new_view.view() >= self.view) + fn create_justification(&self) { + // We need some QC in order to be able to create a justification. + assert!(self.high_commit_qc.is_some() || self.high_timeout_qc.is_some()); - // Check that the new view is valid. - assert!(new_view.verify()); - - // Update our state. - match new_view.justification { - Commit(qc) => self.process_commit_qc(Some(qc)), - Timeout(qc) => { - self.process_commit_qc(qc.high_commit_qc); - self.high_timeout_qc = max(Some(qc), self.high_timeout_qc); + if self.high_commit_qc.map(|x| x.view()) >= self.high_timeout_qc.map(|x| x.view()) { + Justification::Commit(self.high_commit_qc.unwrap()) + } else { + Justification::Timeout(self.high_timeout_qc.unwrap()) } - }; - - if new_view.view() > self.view { - self.start_new_view(new_view.view()); } -} - -fn start_new_view(self, view: ViewNumber) { - self.view = view; - self.phase = Phase::Prepare; - - // Send a new view message to the other replicas, for synchronization. - let new_view = NewView::new(self.get_justification(view)); - - self.send(new_view); -} +} \ No newline at end of file diff --git a/spec/informal-spec/types.rs b/spec/informal-spec/types.rs index 7dd9885e..f5789cff 100644 --- a/spec/informal-spec/types.rs +++ b/spec/informal-spec/types.rs @@ -8,6 +8,13 @@ const QUORUM_WEIGHT = TOTAL_WEIGHT - FAULTY_WEIGHT; // The weight threshold needed to trigger a reproposal. const SUBQUORUM_WEIGHT = TOTAL_WEIGHT - 3 * FAULTY_WEIGHT; +/// A block with a matching valid certificate. +/// invariants: hash(block) == commit_qc.vote.block_hash +struct CommittedBlock { + block: Block, + commit_qc: CommitQC, +} + // Messages struct Proposal { @@ -39,6 +46,7 @@ enum Justification { // A timeout QC is just a collection of timeout votes (with at least // QUORUM_WEIGHT) for the previous view. Unlike with the Commit QC, // timeout votes don't need to be identical. + // The first proposal, for view 0, will always be a timeout. Timeout(TimeoutQC), }