Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test nodes leaving the network #1540

Merged
merged 8 commits into from
Aug 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,21 @@ test_async_std_all:
echo Testing with async std executor
cargo test --features=full-ci --lib --bins --tests --benches --workspace --no-fail-fast -- --test-threads=1

_test_basic:
test_basic:
echo Testing with async std executor
RUST_LOG="" cargo test --features=full-ci --lib --bins --tests --benches --workspace --no-fail-fast test_basic -- --test-threads=1 --nocapture

test_web_server:
echo Testing with async std executor
RUST_LOG="" cargo test --features=full-ci --lib --bins --tests --benches --workspace --no-fail-fast web_server_network -- --test-threads=1 --nocapture

_test_basic_tokio:
test_basic_tokio:
echo Testing with tokio executor
RUST_LOG="" cargo test --features=tokio-ci --lib --bins --tests --benches --workspace --no-fail-fast test_basic -- --test-threads=1 --nocapture

test_with_failures:
echo Testing with async std executor
RUST_LOG="" cargo test --features=full-ci --lib --bins --tests --benches --workspace --no-fail-fast test_with_failures -- --test-threads=1 --nocapture
echo Testing nodes leaving the network with async std executor
RUST_LOG="" ASYNC_STD_THREAD_COUNT=1 cargo test --features=full-ci --lib --bins --tests --benches --workspace --no-fail-fast test_with_failures -- --test-threads=1 --nocapture

test_pkg := "hotshot"

Expand Down
2 changes: 1 addition & 1 deletion task/src/task_launcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ impl TaskRunner /* <N> */ {
}
}

// `name` is for logging purposes only and may be duplicated or inconsistent.
/// to support builder pattern
// pub fn add_task<HSTT: HotShotTaskTypes<Error = (dyn TaskErr + 'static)>>(&mut self, id: HotShotTaskId, name: String, builder: TaskBuilder<HSTT>) -> TaskRunner<N+1>{
#[must_use]
pub fn add_task(
mut self,
Expand Down
29 changes: 16 additions & 13 deletions testing/src/overall_safety_task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use hotshot_task::{
};
use hotshot_types::{
certificate::QuorumCertificate,
data::LeafType,
data::{DeltasType, LeafType},
error::RoundTimedoutState,
event::{Event, EventType},
traits::node_implementation::NodeType,
Expand Down Expand Up @@ -248,7 +248,7 @@ impl<TYPES: NodeType, LEAF: LeafType<NodeType = TYPES>> RoundResult<TYPES, LEAF>
);

let mut result_state = None;
let mut result_block = None;
let mut result_commitment = None;

if check_state {
for (state, num_nodes) in states {
Expand All @@ -266,19 +266,22 @@ impl<TYPES: NodeType, LEAF: LeafType<NodeType = TYPES>> RoundResult<TYPES, LEAF>
}

if check_block {
for (block, num_nodes) in blocks.clone() {
if num_nodes >= threshold {
result_block = Some(block.clone());
self.success = true;
self.agreed_block = Some(block);
// Check if the block commitments are the same.
let mut consistent_block = None;
for (delta, _) in blocks.clone() {
let commitment = delta.block_commitment();
if let Some(consistent_commitment) = result_commitment {
if commitment != consistent_commitment {
self.success = false;
error!("Inconsistent blocks, blocks: {:?}", blocks);
return Err(OverallSafetyTaskErr::InconsistentBlocks);
}
}
result_commitment = Some(commitment);
consistent_block = Some(delta);
}

if result_block.is_none() {
self.success = false;
error!("Inconsistent blocks, blocks: {:?}", blocks);
return Err(OverallSafetyTaskErr::InconsistentBlocks);
}
self.success = true;
self.agreed_block = consistent_block;
}
Ok(())
}
Expand Down
3 changes: 1 addition & 2 deletions testing/src/spinning_task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ use hotshot_types::traits::node_implementation::NodeType;
use snafu::Snafu;

use crate::{test_launcher::TaskGenerator, test_runner::Node, GlobalTestEvent};

#[derive(Snafu, Debug)]
pub struct SpinningTaskErr {}

Expand Down Expand Up @@ -121,7 +120,7 @@ impl SpinningTaskDescription {
},
));
let builder = TaskBuilder::<SpinningTaskTypes<TYPES, I>>::new(
"Spinning Nodes Task".to_string(),
"Test Spinning Task".to_string(),
)
.register_event_stream(test_event_stream, FilterEvent::default())
.await
Expand Down
26 changes: 26 additions & 0 deletions testing/src/test_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,32 @@ impl TestMetadata {
..TestMetadata::default()
}
}

/// Default setting with 20 nodes and 10 views of successful views.
pub fn default_more_nodes_less_success() -> TestMetadata {
TestMetadata {
total_nodes: 20,
start_nodes: 20,
num_bootstrap_nodes: 20,
// The first 14 (i.e., 20 - f) nodes are in the DA committee and we may shutdown the
// remaining 6 (i.e., f) nodes. We could remove this restriction after fixing the
// following issue.
// TODO: Update message broadcasting to avoid hanging
// <https://github.com/EspressoSystems/HotShot/issues/1567>
da_committee_size: 14,
completion_task_description: CompletionTaskDescription::TimeBasedCompletionTaskBuilder(
TimeBasedCompletionTaskDescription {
// Increase the duration to get the expected number of successful views.
duration: Duration::new(40, 0),
},
),
overall_safety_properties: OverallSafetyPropertiesDescription {
num_successful_views: 10,
..Default::default()
},
..TestMetadata::default()
}
}
}

impl Default for TestMetadata {
Expand Down
6 changes: 3 additions & 3 deletions testing/src/test_runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ where
test_event_stream.clone(),
)
.await;
task_runner = task_runner.add_task(id, "Completion Task".to_string(), task);
task_runner = task_runner.add_task(id, "Test Completion Task".to_string(), task);

// add spinning task
let spinning_task_state = crate::spinning_task::SpinningTask {
Expand All @@ -128,7 +128,7 @@ where
test_event_stream.clone(),
)
.await;
task_runner = task_runner.add_task(id, "Completion Task".to_string(), task);
task_runner = task_runner.add_task(id, "Test Spinning Task".to_string(), task);

// add safety task
let overall_safety_task_state = OverallSafetyTask {
Expand All @@ -142,7 +142,7 @@ where
test_event_stream.clone(),
)
.await;
task_runner = task_runner.add_task(id, "Overall Safety Task".to_string(), task);
task_runner = task_runner.add_task(id, "Test Overall Safety Task".to_string(), task);

// Start hotshot
// Goes through all nodes, but really only needs to call this on the leader node of the first view
Expand Down
163 changes: 126 additions & 37 deletions testing/tests/basic.rs
Original file line number Diff line number Diff line change
@@ -1,80 +1,169 @@
use hotshot_testing::node_types::SequencingMemoryImpl;

#[cfg(test)]
#[cfg_attr(
feature = "tokio-executor",
tokio::test(flavor = "multi_thread", worker_threads = 2)
)]
#[cfg_attr(feature = "async-std-executor", async_std::test)]
async fn test_basic() {
use hotshot_testing::{
node_types::{SequencingMemoryImpl, SequencingTestTypes},
test_builder::TestMetadata,
};

async_compatibility_layer::logging::setup_logging();
async_compatibility_layer::logging::setup_backtrace();
let metadata = hotshot_testing::test_builder::TestMetadata::default();
let metadata = TestMetadata::default();
metadata
.gen_launcher::<hotshot_testing::node_types::SequencingTestTypes, SequencingMemoryImpl>()
.gen_launcher::<SequencingTestTypes, SequencingMemoryImpl>()
.launch()
.run_test()
.await;
}

/// Test one node leaving the network.
#[cfg(test)]
#[cfg_attr(
feature = "tokio-executor",
tokio::test(flavor = "multi_thread", worker_threads = 2)
)]
#[cfg_attr(feature = "async-std-executor", async_std::test)]
// TODO Keyao (testing harness) Remove `#[ignore]` after making the test pass. It's currently
// hanging.
#[ignore]
async fn test_with_failures() {
async fn test_with_failures_one() {
use std::time::Duration;

use hotshot_testing::{
completion_task::TimeBasedCompletionTaskDescription, spinning_task::SpinningTaskDescription,
node_types::{SequencingMemoryImpl, SequencingTestTypes},
spinning_task::{ChangeNode, SpinningTaskDescription, UpDown},
test_builder::TestMetadata,
};

async_compatibility_layer::logging::setup_logging();
async_compatibility_layer::logging::setup_backtrace();
let mut metadata = hotshot_testing::test_builder::TestMetadata {
total_nodes: 20,
start_nodes: 20,
num_bootstrap_nodes: 20,
da_committee_size: 20,
completion_task_description: hotshot_testing::completion_task::CompletionTaskDescription::TimeBasedCompletionTaskBuilder(TimeBasedCompletionTaskDescription{duration: Duration::new(120, 0)}),
// overall_safety_properties: OverallSafetyPropertiesDescription {
// threshold_calculator: std::sync::Arc::new(|_, _| {10}),
// ..Default::default()
// },
..hotshot_testing::test_builder::TestMetadata::default()
let mut metadata = TestMetadata::default_more_nodes_less_success();
// The first 14 (i.e., 20 - f) nodes are in the DA committee and we may shutdown the
// remaining 6 (i.e., f) nodes. We could remove this restriction after fixing the
// following issue.
// TODO: Update message broadcasting to avoid hanging
// <https://github.com/EspressoSystems/HotShot/issues/1567>
let dead_nodes = vec![ChangeNode {
idx: 19,
updown: UpDown::Down,
}];

metadata.spinning_properties = SpinningTaskDescription {
node_changes: vec![(Duration::new(4, 0), dead_nodes)],
};
metadata
.gen_launcher::<SequencingTestTypes, SequencingMemoryImpl>()
.launch()
.run_test()
.await;
}

/// Test f/2 nodes leaving the network.
#[cfg(test)]
#[cfg_attr(
feature = "tokio-executor",
tokio::test(flavor = "multi_thread", worker_threads = 2)
)]
#[cfg_attr(feature = "async-std-executor", async_std::test)]
async fn test_with_failures_half_f() {
use std::time::Duration;

use hotshot_testing::{
node_types::{SequencingMemoryImpl, SequencingTestTypes},
spinning_task::{ChangeNode, SpinningTaskDescription, UpDown},
test_builder::TestMetadata,
};

async_compatibility_layer::logging::setup_logging();
async_compatibility_layer::logging::setup_backtrace();
let mut metadata = TestMetadata::default_more_nodes_less_success();
// The first 14 (i.e., 20 - f) nodes are in the DA committee and we may shutdown the
// remaining 6 (i.e., f) nodes. We could remove this restriction after fixing the
// following issue.
// TODO: Update message broadcasting to avoid hanging
// <https://github.com/EspressoSystems/HotShot/issues/1567>
let dead_nodes = vec![
hotshot_testing::spinning_task::ChangeNode {
idx: 0,
updown: hotshot_testing::spinning_task::UpDown::Down,
ChangeNode {
idx: 17,
updown: UpDown::Down,
},
ChangeNode {
idx: 18,
updown: UpDown::Down,
},
ChangeNode {
idx: 19,
updown: UpDown::Down,
},
];

metadata.spinning_properties = SpinningTaskDescription {
node_changes: vec![(Duration::new(4, 0), dead_nodes)],
};
metadata
.gen_launcher::<SequencingTestTypes, SequencingMemoryImpl>()
.launch()
.run_test()
.await;
}

/// Test f nodes leaving the network.
#[cfg(test)]
#[cfg_attr(
feature = "tokio-executor",
tokio::test(flavor = "multi_thread", worker_threads = 2)
)]
#[cfg_attr(feature = "async-std-executor", async_std::test)]
async fn test_with_failures_f() {
use std::time::Duration;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Maybe we could add a quick comment that these failures purposefully don't include DA nodes. I feel like we may forget this in the future when we switch to a rotating DA committee, and then we'll wonder why these tests fail. :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, the failures actually include the DA nodes. The DA committee is the entire network for this test, so those 6 nodes to be killed are also DA nodes.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now failures only include DA nodes, so I added a comment.


use hotshot_testing::{
node_types::{SequencingMemoryImpl, SequencingTestTypes},
spinning_task::{ChangeNode, SpinningTaskDescription, UpDown},
test_builder::TestMetadata,
};

async_compatibility_layer::logging::setup_logging();
async_compatibility_layer::logging::setup_backtrace();
let mut metadata = TestMetadata::default_more_nodes_less_success();
// The first 14 (i.e., 20 - f) nodes are in the DA committee and we may shutdown the
// remaining 6 (i.e., f) nodes. We could remove this restriction after fixing the
// following issue.
// TODO: Update message broadcasting to avoid hanging
// <https://github.com/EspressoSystems/HotShot/issues/1567>
let dead_nodes = vec![
ChangeNode {
idx: 14,
updown: UpDown::Down,
},
ChangeNode {
idx: 15,
updown: UpDown::Down,
},
hotshot_testing::spinning_task::ChangeNode {
idx: 1,
updown: hotshot_testing::spinning_task::UpDown::Down,
ChangeNode {
idx: 16,
updown: UpDown::Down,
},
hotshot_testing::spinning_task::ChangeNode {
idx: 2,
updown: hotshot_testing::spinning_task::UpDown::Down,
ChangeNode {
idx: 17,
updown: UpDown::Down,
},
hotshot_testing::spinning_task::ChangeNode {
idx: 3,
updown: hotshot_testing::spinning_task::UpDown::Down,
ChangeNode {
idx: 18,
updown: UpDown::Down,
},
hotshot_testing::spinning_task::ChangeNode {
idx: 4,
updown: hotshot_testing::spinning_task::UpDown::Down,
ChangeNode {
idx: 19,
updown: UpDown::Down,
},
];

metadata.spinning_properties = SpinningTaskDescription {
node_changes: vec![(std::time::Duration::new(4, 0), dead_nodes)],
node_changes: vec![(Duration::new(4, 0), dead_nodes)],
};
metadata
.gen_launcher::<hotshot_testing::node_types::SequencingTestTypes, hotshot_testing::node_types::SequencingMemoryImpl>()
.gen_launcher::<SequencingTestTypes, SequencingMemoryImpl>()
.launch()
.run_test()
.await;
Expand Down
Loading