Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[1.0 -> main] Logging improvements #666

Merged
merged 11 commits into from
Aug 29, 2024
27 changes: 20 additions & 7 deletions libraries/chain/finality/finalizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,24 @@ finalizer::vote_result finalizer::decide_vote(const block_state_ptr& bsp) {
res.liveness_check = bsp->core.latest_qc_block_timestamp() > fsi.lock.timestamp;

if (!res.liveness_check) {
fc_ilog(vote_logger, "liveness check failed, block ${bn} ${id}: ${c} <= ${l}, fsi.lock ${lbn} ${lid}, latest_qc_claim: ${qc}",
fc_dlog(vote_logger, "liveness check failed, block ${bn} ${id}: ${c} <= ${l}, fsi.lock ${lbn} ${lid}, latest_qc_claim: ${qc}",
("bn", bsp->block_num())("id", bsp->id())("c", bsp->core.latest_qc_block_timestamp())("l", fsi.lock.timestamp)
("lbn", fsi.lock.block_num())("lid", fsi.lock.block_id)
("qc", bsp->core.latest_qc_claim()));

// Safety check : check if this proposal extends the proposal we're locked on
res.safety_check = bsp->core.extends(fsi.lock.block_id);
if (!res.safety_check) {
fc_wlog(vote_logger, "safety check failed, block ${bn} ${id} did not extend fsi.lock ${lbn} ${lid}",
fc_dlog(vote_logger, "safety check failed, block ${bn} ${id} did not extend fsi.lock ${lbn} ${lid}",
("bn", bsp->block_num())("id", bsp->id())("lbn", fsi.lock.block_num())("lid", fsi.lock.block_id));
}
}
} else {
// Safety and Liveness both fail if `fsi.lock` is empty. It should not happen.
// `fsi.lock` is initially set to `lib` when switching to IF or starting from a snapshot.
// -------------------------------------------------------------------------------------
fc_wlog(vote_logger, "liveness check & safety check failed, block ${bn} ${id}, fsi.lock is empty", ("bn", bsp->block_num())("id", bsp->id()));
fc_wlog(vote_logger, "liveness check & safety check failed, block ${bn} ${id}, fsi.lock is empty",
("bn", bsp->block_num())("id", bsp->id()));
res.liveness_check = false;
res.safety_check = false;
}
Expand Down Expand Up @@ -94,9 +95,21 @@ finalizer::vote_result finalizer::decide_vote(const block_state_ptr& bsp) {
fsi.last_vote = bsp->make_block_ref();
}

fc_dlog(vote_logger, "block=${bn} ${id}, liveness_check=${l}, safety_check=${s}, monotony_check=${m}, can vote=${can_vote}, voting=${v}, locked=${lbn} ${lid}",
("bn", bsp->block_num())("id", bsp->id())("l",res.liveness_check)("s",res.safety_check)("m",res.monotony_check)
("can_vote",can_vote)("v", res.decision)("lbn", fsi.lock.block_num())("lid", fsi.lock.block_id));
if (res.liveness_check) {
fc_dlog(vote_logger, "block=${bn} ${id}, liveness=${l}, can vote=${cn}, voting=${v}, locked=${lbn} ${lid}",
("bn", bsp->block_num())("id", bsp->id())("l",res.liveness_check)
("cn",can_vote)("v", res.decision)("lbn", fsi.lock.block_num())("lid", fsi.lock.block_id));
} else if (can_vote) {
fc_dlog(vote_logger, "block=${bn} ${id}, liveness=${l}, safety=${s}, can vote=${cn}, voting=${v}, locked=${lbn} ${lid}",
("bn", bsp->block_num())("id", bsp->id())("l",res.liveness_check)("s",res.safety_check)
("cn",can_vote)("v", res.decision)("lbn", fsi.lock.block_num())("lid", fsi.lock.block_id));
} else {
fc_ilog(vote_logger, "block=${bn} ${id}, liveness=${l}, safety=${s}, can vote=${cn}, voting=${v}, "
"${ct} <= ${lt}, locked=${lbn} ${lid}, latest_qc_claim: ${qc}",
("bn", bsp->block_num())("id", bsp->id())("l",res.liveness_check)("s",res.safety_check)
("cn",can_vote)("v", res.decision)("ct", bsp->core.latest_qc_block_timestamp())("lt", fsi.lock.timestamp)
("lbn", fsi.lock.block_num())("lid", fsi.lock.block_id)("qc", bsp->core.latest_qc_claim()));
}
return res;
}

Expand Down Expand Up @@ -326,7 +339,7 @@ my_finalizers_t::fsi_map my_finalizers_t::load_finalizer_safety_info() {

// close file after write
cfile_ds.close();
} FC_LOG_AND_RETHROW()
} FC_RETHROW_EXCEPTIONS(log_level::error, "corrupted finalizer safety persistence file ${p}", ("p", persist_file_path))
// don't remove file we can't load
return res;
}
Expand Down
10 changes: 7 additions & 3 deletions plugins/chain_plugin/tracked_votes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,13 @@ namespace eosio::chain_apis {
return;

if (!block->contains_extension(chain::quorum_certificate_extension::extension_id())) {
fc_ilog(chain::vote_logger, "Block ${id}... #${n} @ ${t} produced by ${p}, latency: ${l}ms has no votes",
("id", id.str().substr(8, 16))("n", block->block_num())("t", block->timestamp)("p", block->producer)
("l", (now - block->timestamp).count() / 1000));
if (chain::vote_logger.is_enabled(fc::log_level::info)) {
std::optional<chain::block_header_extension> fin_ext = block->extract_header_extension(chain::finality_extension::extension_id());
chain::qc_claim_t claim = fin_ext ? std::get<chain::finality_extension>(*fin_ext).qc_claim : chain::qc_claim_t{};
fc_ilog(chain::vote_logger, "Block ${id}... #${n} @ ${t} produced by ${p}, latency: ${l}ms has no qc, claim: ${c}",
("id", id.str().substr(8, 16))("n", block->block_num())("t", block->timestamp)("p", block->producer)
("l", (now - block->timestamp).count() / 1000)("c", claim));
}
return;
}

Expand Down
51 changes: 31 additions & 20 deletions plugins/producer_plugin/producer_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -755,23 +755,32 @@ class producer_plugin_impl : public std::enable_shared_from_this<producer_plugin
}
}

// true if paused due to not receiving votes
bool is_implicitly_paused() const {
enum class implicit_pause {
not_paused, // not implicitly paused
prod_paused, // paused due to not receiving vote associated with producer
other_paused // paused due to not receiving any vote from finalizers not associated with producer
};
implicit_pause implicitly_paused() const {
if (_producers.empty() || _production_pause_vote_timeout.count() == 0)
return false;
return implicit_pause::not_paused;
if (!_is_savanna_active)
return false; // no implicit pause in legacy
return implicit_pause::not_paused; // no implicit pause in legacy
if (_producers.contains(eosio::chain::config::system_account_name)) // disable implicit pause for eosio
return false;
return implicit_pause::not_paused;

auto last_producer_vote_received = _last_producer_vote_received.load(std::memory_order_relaxed);
auto last_other_vote_received = _last_other_vote_received.load(std::memory_order_relaxed);

// need a vote within timeout of last accepted block
return (_is_producer_active_finalizer &&
_accepted_block_time - last_producer_vote_received > _production_pause_vote_timeout)
|| (_other_active_finalizers &&
_accepted_block_time - last_other_vote_received > _production_pause_vote_timeout);
if (_is_producer_active_finalizer &&
_accepted_block_time - last_producer_vote_received > _production_pause_vote_timeout) {
return implicit_pause::prod_paused;
}
if (_other_active_finalizers &&
_accepted_block_time - last_other_vote_received > _production_pause_vote_timeout) {
return implicit_pause::other_paused;
}
return implicit_pause::not_paused;
}

void abort_block() {
Expand Down Expand Up @@ -1598,7 +1607,7 @@ void producer_plugin::resume() {
}

bool producer_plugin::paused() const {
return my->_pause_production || my->is_implicitly_paused();
return my->_pause_production || (my->implicitly_paused() != producer_plugin_impl::implicit_pause::not_paused);
}

void producer_plugin_impl::update_runtime_options(const producer_plugin::runtime_options& options) {
Expand Down Expand Up @@ -1963,28 +1972,30 @@ producer_plugin_impl::start_block_result producer_plugin_impl::start_block() {
} else if (_producers.find(scheduled_producer.producer_name) == _producers.end()) {
_pending_block_mode = pending_block_mode::speculating;
} else if (num_relevant_signatures == 0) {
fc_elog(_log, "Not producing block because I don't have any private keys relevant to authority: ${authority}",
("authority", scheduled_producer.authority));
fc_elog(_log, "Not producing block because I don't have any private keys relevant to authority: ${authority}, block ${t}",
("authority", scheduled_producer.authority)("t", block_time));
_pending_block_mode = pending_block_mode::speculating;
not_producing_when_time = true;
} else if (_pause_production) {
fc_wlog(_log, "Not producing block because production is explicitly paused");
fc_wlog(_log, "Not producing block because production is explicitly paused, block ${t}", ("t", block_time));
_pending_block_mode = pending_block_mode::speculating;
not_producing_when_time = true;
} else if (_max_irreversible_block_age_us.count() >= 0 && irreversible_block_age >= _max_irreversible_block_age_us) {
fc_elog(_log, "Not producing block because the irreversible block is too old [age:${age}s, max:${max}s]",
("age", irreversible_block_age.count() / 1'000'000)("max", _max_irreversible_block_age_us.count() / 1'000'000));
fc_elog(_log, "Not producing block because the irreversible block is too old [age:${age}s, max:${max}s], block ${t}",
("age", irreversible_block_age.count() / 1'000'000)("max", _max_irreversible_block_age_us.count() / 1'000'000)
("t", block_time));
_pending_block_mode = pending_block_mode::speculating;
not_producing_when_time = true;
} else if (is_implicitly_paused()) {
fc_elog(_log, "Not producing block because no recent votes, last producer vote ${pv}, other votes ${ov}, last block time ${bt}",
("pv", _last_producer_vote_received.load(std::memory_order_relaxed))
} else if (implicit_pause p = implicitly_paused(); p != implicit_pause::not_paused) {
std::string reason = p == implicit_pause::prod_paused ? "producer votes" : "votes received";
fc_elog(_log, "Not producing block because no recent ${r}, block ${t}, last producer vote ${pv}, other votes ${ov}, last accepted block time ${bt}",
("r", std::move(reason))("t", block_time)("pv", _last_producer_vote_received.load(std::memory_order_relaxed))
("ov", _last_other_vote_received.load(std::memory_order_relaxed))("bt", _accepted_block_time));
_pending_block_mode = pending_block_mode::speculating;
not_producing_when_time = true;
} else if (_max_reversible_blocks > 0 && head_block_num - head.irreversible_blocknum() > _max_reversible_blocks) {
fc_elog(_log, "Not producing block because max-reversible-blocks ${m} reached, head ${h}, lib ${l}.",
("m", _max_reversible_blocks)("h", head_block_num)("l", head.irreversible_blocknum()));
fc_elog(_log, "Not producing block because max-reversible-blocks ${m} reached, head ${h}, lib ${l}, block ${t}",
("m", _max_reversible_blocks)("h", head_block_num)("l", head.irreversible_blocknum())("t", block_time));
_pending_block_mode = pending_block_mode::speculating;
not_producing_when_time = true;
}
Expand Down
2 changes: 1 addition & 1 deletion tests/liveness_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
# test relies on production continuing
extraNodeosArgs=" --production-pause-vote-timeout-ms 0 "
# "mesh" shape connects nodeA and nodeB to each other
if cluster.launch(topo="mesh", pnodes=totalProducerNodes,
if cluster.launch(topo="mesh", pnodes=totalProducerNodes, extraNodeosArgs=extraNodeosArgs,
totalNodes=totalNodes, totalProducers=totalProducerNodes, loadSystemContract=False,
activateIF=activateIF, biosFinalizer=False) is False:
Utils.cmdError("launcher")
Expand Down