Skip to content

Commit

Permalink
Merge pull request #666 from AntelopeIO/logging-improvements-rc3-main
Browse files Browse the repository at this point in the history
[1.0 -> main] Logging improvements
  • Loading branch information
heifner authored Aug 29, 2024
2 parents f5a0638 + 25465e3 commit b118afd
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 31 deletions.
27 changes: 20 additions & 7 deletions libraries/chain/finality/finalizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,24 @@ finalizer::vote_result finalizer::decide_vote(const block_state_ptr& bsp) {
res.liveness_check = bsp->core.latest_qc_block_timestamp() > fsi.lock.timestamp;

if (!res.liveness_check) {
fc_ilog(vote_logger, "liveness check failed, block ${bn} ${id}: ${c} <= ${l}, fsi.lock ${lbn} ${lid}, latest_qc_claim: ${qc}",
fc_dlog(vote_logger, "liveness check failed, block ${bn} ${id}: ${c} <= ${l}, fsi.lock ${lbn} ${lid}, latest_qc_claim: ${qc}",
("bn", bsp->block_num())("id", bsp->id())("c", bsp->core.latest_qc_block_timestamp())("l", fsi.lock.timestamp)
("lbn", fsi.lock.block_num())("lid", fsi.lock.block_id)
("qc", bsp->core.latest_qc_claim()));

// Safety check : check if this proposal extends the proposal we're locked on
res.safety_check = bsp->core.extends(fsi.lock.block_id);
if (!res.safety_check) {
fc_wlog(vote_logger, "safety check failed, block ${bn} ${id} did not extend fsi.lock ${lbn} ${lid}",
fc_dlog(vote_logger, "safety check failed, block ${bn} ${id} did not extend fsi.lock ${lbn} ${lid}",
("bn", bsp->block_num())("id", bsp->id())("lbn", fsi.lock.block_num())("lid", fsi.lock.block_id));
}
}
} else {
// Safety and Liveness both fail if `fsi.lock` is empty. It should not happen.
// `fsi.lock` is initially set to `lib` when switching to IF or starting from a snapshot.
// -------------------------------------------------------------------------------------
fc_wlog(vote_logger, "liveness check & safety check failed, block ${bn} ${id}, fsi.lock is empty", ("bn", bsp->block_num())("id", bsp->id()));
fc_wlog(vote_logger, "liveness check & safety check failed, block ${bn} ${id}, fsi.lock is empty",
("bn", bsp->block_num())("id", bsp->id()));
res.liveness_check = false;
res.safety_check = false;
}
Expand Down Expand Up @@ -94,9 +95,21 @@ finalizer::vote_result finalizer::decide_vote(const block_state_ptr& bsp) {
fsi.last_vote = bsp->make_block_ref();
}

fc_dlog(vote_logger, "block=${bn} ${id}, liveness_check=${l}, safety_check=${s}, monotony_check=${m}, can vote=${can_vote}, voting=${v}, locked=${lbn} ${lid}",
("bn", bsp->block_num())("id", bsp->id())("l",res.liveness_check)("s",res.safety_check)("m",res.monotony_check)
("can_vote",can_vote)("v", res.decision)("lbn", fsi.lock.block_num())("lid", fsi.lock.block_id));
if (res.liveness_check) {
fc_dlog(vote_logger, "block=${bn} ${id}, liveness=${l}, can vote=${cn}, voting=${v}, locked=${lbn} ${lid}",
("bn", bsp->block_num())("id", bsp->id())("l",res.liveness_check)
("cn",can_vote)("v", res.decision)("lbn", fsi.lock.block_num())("lid", fsi.lock.block_id));
} else if (can_vote) {
fc_dlog(vote_logger, "block=${bn} ${id}, liveness=${l}, safety=${s}, can vote=${cn}, voting=${v}, locked=${lbn} ${lid}",
("bn", bsp->block_num())("id", bsp->id())("l",res.liveness_check)("s",res.safety_check)
("cn",can_vote)("v", res.decision)("lbn", fsi.lock.block_num())("lid", fsi.lock.block_id));
} else {
fc_ilog(vote_logger, "block=${bn} ${id}, liveness=${l}, safety=${s}, can vote=${cn}, voting=${v}, "
"${ct} <= ${lt}, locked=${lbn} ${lid}, latest_qc_claim: ${qc}",
("bn", bsp->block_num())("id", bsp->id())("l",res.liveness_check)("s",res.safety_check)
("cn",can_vote)("v", res.decision)("ct", bsp->core.latest_qc_block_timestamp())("lt", fsi.lock.timestamp)
("lbn", fsi.lock.block_num())("lid", fsi.lock.block_id)("qc", bsp->core.latest_qc_claim()));
}
return res;
}

Expand Down Expand Up @@ -326,7 +339,7 @@ my_finalizers_t::fsi_map my_finalizers_t::load_finalizer_safety_info() {

// close file after write
cfile_ds.close();
} FC_LOG_AND_RETHROW()
} FC_RETHROW_EXCEPTIONS(log_level::error, "corrupted finalizer safety persistence file ${p}", ("p", persist_file_path))
// don't remove file we can't load
return res;
}
Expand Down
10 changes: 7 additions & 3 deletions plugins/chain_plugin/tracked_votes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,13 @@ namespace eosio::chain_apis {
return;

if (!block->contains_extension(chain::quorum_certificate_extension::extension_id())) {
fc_ilog(chain::vote_logger, "Block ${id}... #${n} @ ${t} produced by ${p}, latency: ${l}ms has no votes",
("id", id.str().substr(8, 16))("n", block->block_num())("t", block->timestamp)("p", block->producer)
("l", (now - block->timestamp).count() / 1000));
if (chain::vote_logger.is_enabled(fc::log_level::info)) {
std::optional<chain::block_header_extension> fin_ext = block->extract_header_extension(chain::finality_extension::extension_id());
chain::qc_claim_t claim = fin_ext ? std::get<chain::finality_extension>(*fin_ext).qc_claim : chain::qc_claim_t{};
fc_ilog(chain::vote_logger, "Block ${id}... #${n} @ ${t} produced by ${p}, latency: ${l}ms has no qc, claim: ${c}",
("id", id.str().substr(8, 16))("n", block->block_num())("t", block->timestamp)("p", block->producer)
("l", (now - block->timestamp).count() / 1000)("c", claim));
}
return;
}

Expand Down
51 changes: 31 additions & 20 deletions plugins/producer_plugin/producer_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -755,23 +755,32 @@ class producer_plugin_impl : public std::enable_shared_from_this<producer_plugin
}
}

// true if paused due to not receiving votes
bool is_implicitly_paused() const {
enum class implicit_pause {
not_paused, // not implicitly paused
prod_paused, // paused due to not receiving vote associated with producer
other_paused // paused due to not receiving any vote from finalizers not associated with producer
};
implicit_pause implicitly_paused() const {
if (_producers.empty() || _production_pause_vote_timeout.count() == 0)
return false;
return implicit_pause::not_paused;
if (!_is_savanna_active)
return false; // no implicit pause in legacy
return implicit_pause::not_paused; // no implicit pause in legacy
if (_producers.contains(eosio::chain::config::system_account_name)) // disable implicit pause for eosio
return false;
return implicit_pause::not_paused;

auto last_producer_vote_received = _last_producer_vote_received.load(std::memory_order_relaxed);
auto last_other_vote_received = _last_other_vote_received.load(std::memory_order_relaxed);

// need a vote within timeout of last accepted block
return (_is_producer_active_finalizer &&
_accepted_block_time - last_producer_vote_received > _production_pause_vote_timeout)
|| (_other_active_finalizers &&
_accepted_block_time - last_other_vote_received > _production_pause_vote_timeout);
if (_is_producer_active_finalizer &&
_accepted_block_time - last_producer_vote_received > _production_pause_vote_timeout) {
return implicit_pause::prod_paused;
}
if (_other_active_finalizers &&
_accepted_block_time - last_other_vote_received > _production_pause_vote_timeout) {
return implicit_pause::other_paused;
}
return implicit_pause::not_paused;
}

void abort_block() {
Expand Down Expand Up @@ -1598,7 +1607,7 @@ void producer_plugin::resume() {
}

bool producer_plugin::paused() const {
return my->_pause_production || my->is_implicitly_paused();
return my->_pause_production || (my->implicitly_paused() != producer_plugin_impl::implicit_pause::not_paused);
}

void producer_plugin_impl::update_runtime_options(const producer_plugin::runtime_options& options) {
Expand Down Expand Up @@ -1963,28 +1972,30 @@ producer_plugin_impl::start_block_result producer_plugin_impl::start_block() {
} else if (_producers.find(scheduled_producer.producer_name) == _producers.end()) {
_pending_block_mode = pending_block_mode::speculating;
} else if (num_relevant_signatures == 0) {
fc_elog(_log, "Not producing block because I don't have any private keys relevant to authority: ${authority}",
("authority", scheduled_producer.authority));
fc_elog(_log, "Not producing block because I don't have any private keys relevant to authority: ${authority}, block ${t}",
("authority", scheduled_producer.authority)("t", block_time));
_pending_block_mode = pending_block_mode::speculating;
not_producing_when_time = true;
} else if (_pause_production) {
fc_wlog(_log, "Not producing block because production is explicitly paused");
fc_wlog(_log, "Not producing block because production is explicitly paused, block ${t}", ("t", block_time));
_pending_block_mode = pending_block_mode::speculating;
not_producing_when_time = true;
} else if (_max_irreversible_block_age_us.count() >= 0 && irreversible_block_age >= _max_irreversible_block_age_us) {
fc_elog(_log, "Not producing block because the irreversible block is too old [age:${age}s, max:${max}s]",
("age", irreversible_block_age.count() / 1'000'000)("max", _max_irreversible_block_age_us.count() / 1'000'000));
fc_elog(_log, "Not producing block because the irreversible block is too old [age:${age}s, max:${max}s], block ${t}",
("age", irreversible_block_age.count() / 1'000'000)("max", _max_irreversible_block_age_us.count() / 1'000'000)
("t", block_time));
_pending_block_mode = pending_block_mode::speculating;
not_producing_when_time = true;
} else if (is_implicitly_paused()) {
fc_elog(_log, "Not producing block because no recent votes, last producer vote ${pv}, other votes ${ov}, last block time ${bt}",
("pv", _last_producer_vote_received.load(std::memory_order_relaxed))
} else if (implicit_pause p = implicitly_paused(); p != implicit_pause::not_paused) {
std::string reason = p == implicit_pause::prod_paused ? "producer votes" : "votes received";
fc_elog(_log, "Not producing block because no recent ${r}, block ${t}, last producer vote ${pv}, other votes ${ov}, last accepted block time ${bt}",
("r", std::move(reason))("t", block_time)("pv", _last_producer_vote_received.load(std::memory_order_relaxed))
("ov", _last_other_vote_received.load(std::memory_order_relaxed))("bt", _accepted_block_time));
_pending_block_mode = pending_block_mode::speculating;
not_producing_when_time = true;
} else if (_max_reversible_blocks > 0 && head_block_num - head.irreversible_blocknum() > _max_reversible_blocks) {
fc_elog(_log, "Not producing block because max-reversible-blocks ${m} reached, head ${h}, lib ${l}.",
("m", _max_reversible_blocks)("h", head_block_num)("l", head.irreversible_blocknum()));
fc_elog(_log, "Not producing block because max-reversible-blocks ${m} reached, head ${h}, lib ${l}, block ${t}",
("m", _max_reversible_blocks)("h", head_block_num)("l", head.irreversible_blocknum())("t", block_time));
_pending_block_mode = pending_block_mode::speculating;
not_producing_when_time = true;
}
Expand Down
2 changes: 1 addition & 1 deletion tests/liveness_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
# test relies on production continuing
extraNodeosArgs=" --production-pause-vote-timeout-ms 0 "
# "mesh" shape connects nodeA and nodeB to each other
if cluster.launch(topo="mesh", pnodes=totalProducerNodes,
if cluster.launch(topo="mesh", pnodes=totalProducerNodes, extraNodeosArgs=extraNodeosArgs,
totalNodes=totalNodes, totalProducers=totalProducerNodes, loadSystemContract=False,
activateIF=activateIF, biosFinalizer=False) is False:
Utils.cmdError("launcher")
Expand Down

0 comments on commit b118afd

Please sign in to comment.