Skip to content

Commit

Permalink
[xds_override_host] improve logging for debuggability
Browse files Browse the repository at this point in the history
  • Loading branch information
markdroth committed Aug 31, 2023
1 parent 6412412 commit edcc61a
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include "src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.h"

#include <inttypes.h>
#include <stddef.h>

#include <algorithm>
Expand Down Expand Up @@ -430,7 +431,10 @@ void XdsOverrideHostLb::ResetBackoffLocked() {

absl::Status XdsOverrideHostLb::UpdateLocked(UpdateArgs args) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO, "[xds_override_host_lb %p] Received update", this);
gpr_log(GPR_INFO,
"[xds_override_host_lb %p] Received update with %" PRIuPTR
" addresses",
this, args.addresses.ok() ? args.addresses->size() : 0);
}
auto old_config = std::move(config_);
// Update config.
Expand Down Expand Up @@ -498,27 +502,52 @@ OrphanablePtr<LoadBalancingPolicy> XdsOverrideHostLb::CreateChildPolicyLocked(
absl::StatusOr<ServerAddressList> XdsOverrideHostLb::UpdateAddressMap(
absl::StatusOr<ServerAddressList> addresses) {
if (!addresses.ok()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO, "[xds_override_host_lb %p] address error: %s", this,
addresses.status().ToString().c_str());
}
return addresses;
}
ServerAddressList return_value;
std::map<const std::string, XdsHealthStatus> addresses_for_map;
for (const auto& address : *addresses) {
XdsHealthStatus status = GetAddressHealthStatus(address);
if (status.status() != XdsHealthStatus::kDraining) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO,
"[xds_override_host_lb %p] address %s: not draining, "
"passing to child",
this, address.ToString().c_str());
}
return_value.push_back(address);
} else if (!config_->override_host_status_set().Contains(status)) {
// Skip draining hosts if not in the override status set.
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO,
"[xds_override_host_lb %p] address %s: draining but not in "
"override_host_status set -- ignoring",
this, address.ToString().c_str());
}
continue;
}
auto key = grpc_sockaddr_to_uri(&address.address());
if (key.ok()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO,
"[xds_override_host_lb %p] address %s: adding map key %s", this,
address.ToString().c_str(), key->c_str());
}
addresses_for_map.emplace(std::move(*key), status);
}
}
{
MutexLock lock(&subchannel_map_mu_);
for (auto it = subchannel_map_.begin(); it != subchannel_map_.end();) {
if (addresses_for_map.find(it->first) == addresses_for_map.end()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO, "[xds_override_host_lb %p] removing map key %s",
this, it->first.c_str());
}
it = subchannel_map_.erase(it);
} else {
++it;
Expand All @@ -527,10 +556,20 @@ absl::StatusOr<ServerAddressList> XdsOverrideHostLb::UpdateAddressMap(
for (const auto& key_status : addresses_for_map) {
auto it = subchannel_map_.find(key_status.first);
if (it == subchannel_map_.end()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO, "[xds_override_host_lb %p] adding map key %s", this,
key_status.first.c_str());
}
subchannel_map_.emplace(std::piecewise_construct,
std::forward_as_tuple(key_status.first),
std::forward_as_tuple(key_status.second));
} else {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO,
"[xds_override_host_lb %p] setting EDS health status for "
"%s to %s",
this, key_status.first.c_str(), key_status.second.ToString());
}
it->second.SetEdsHealthStatus(key_status.second);
}
}
Expand Down
36 changes: 34 additions & 2 deletions test/core/client_channel/lb_policy/xds_override_host_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include <grpc/grpc.h>
#include <grpc/support/json.h>
#include <grpc/support/log.h>

#include "src/core/ext/filters/stateful_session/stateful_session_filter.h"
#include "src/core/ext/xds/xds_health_status.h"
Expand Down Expand Up @@ -209,18 +210,31 @@ TEST_F(XdsOverrideHostTest, FailedSubchannelIsNotPicked) {
EXPECT_EQ(ExpectPickComplete(picker.get(),
MakeOverrideHostAttribute(kAddresses[1])),
kAddresses[1]);
// Subchannel for address 1 becomes disconnected.
gpr_log(GPR_INFO, "### subchannel 1 reporting IDLE");
auto subchannel = FindSubchannel(kAddresses[1]);
ASSERT_NE(subchannel, nullptr);
subchannel->SetConnectivityState(GRPC_CHANNEL_IDLE);
gpr_log(GPR_INFO, "### expecting re-resolution request");
ExpectReresolutionRequest();
gpr_log(GPR_INFO,
"### expecting RR picks to exclude the disconnected subchannel");
ExpectRoundRobinPicks(ExpectState(GRPC_CHANNEL_READY).get(),
{kAddresses[0], kAddresses[2]});
// It starts trying to reconnect...
gpr_log(GPR_INFO, "### subchannel 1 reporting CONNECTING");
subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING);
gpr_log(GPR_INFO, "### expecting RR picks again");
ExpectRoundRobinPicks(ExpectState(GRPC_CHANNEL_READY).get(),
{kAddresses[0], kAddresses[2]});
// ...but the connection attempt fails.
gpr_log(GPR_INFO, "### subchannel 1 reporting TRANSIENT_FAILURE");
subchannel->SetConnectivityState(GRPC_CHANNEL_TRANSIENT_FAILURE,
absl::ResourceExhaustedError("Hmmmm"));
gpr_log(GPR_INFO, "### expecting re-resolution request");
ExpectReresolutionRequest();
// The host override is not used.
gpr_log(GPR_INFO, "### checking that host override is not used");
picker = ExpectState(GRPC_CHANNEL_READY);
ExpectRoundRobinPicks(picker.get(), {kAddresses[0], kAddresses[2]},
MakeOverrideHostAttribute(kAddresses[1]));
Expand Down Expand Up @@ -292,30 +306,48 @@ TEST_F(XdsOverrideHostTest, DrainingSubchannelIsConnecting) {
EXPECT_EQ(ExpectPickComplete(picker.get(),
MakeOverrideHostAttribute(kAddresses[1])),
kAddresses[1]);
// Send an update that marks the endpoints with different EDS health
// states, but those states are present in override_host_status.
// The picker should use the DRAINING host when a call's override
// points to that hose, but the host should not be used if there is no
// override pointing to it.
gpr_log(GPR_INFO, "### sending update with DRAINING host");
ApplyUpdateWithHealthStatuses(
{{kAddresses[0], XdsHealthStatus::HealthStatus::kUnknown},
{kAddresses[1], XdsHealthStatus::HealthStatus::kDraining},
{kAddresses[2], XdsHealthStatus::HealthStatus::kHealthy}},
{"UNKNOWN", "HEALTHY", "DRAINING"});
auto subchannel = FindSubchannel(kAddresses[1]);
ASSERT_NE(subchannel, nullptr);
// There are two notifications - one from child policy and one from the parent
// policy due to draining channel update
picker = ExpectState(GRPC_CHANNEL_READY);
EXPECT_EQ(ExpectPickComplete(picker.get(),
MakeOverrideHostAttribute(kAddresses[1])),
kAddresses[1]);
ExpectRoundRobinPicks(picker.get(), {kAddresses[0], kAddresses[2]});
// Now the connection to the draining host gets dropped.
// The picker should queue picks where the override host is IDLE.
// All picks without an override host should not use this host.
gpr_log(GPR_INFO, "### closing connection to DRAINING host");
subchannel->SetConnectivityState(GRPC_CHANNEL_IDLE);
picker = ExpectState(GRPC_CHANNEL_READY);
ExpectPickQueued(picker.get(), MakeOverrideHostAttribute(kAddresses[1]));
ExpectRoundRobinPicks(picker.get(), {kAddresses[0], kAddresses[2]});
// The subchannel should have been asked to reconnect as a result of the
// queued pick above. It will therefore transition into state CONNECTING.
// The pick behavior is the same as above: The picker should queue
// picks where the override host is CONNECTING. All picks without an
// override host should not use this host.
gpr_log(GPR_INFO, "### subchannel starts reconnecting");
EXPECT_TRUE(subchannel->ConnectionRequested());
ExpectQueueEmpty();
subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING);
picker = ExpectState(GRPC_CHANNEL_READY);
ExpectPickQueued(picker.get(), MakeOverrideHostAttribute(kAddresses[1]));
ExpectRoundRobinPicks(picker.get(), {kAddresses[0], kAddresses[2]});
// The subchannel now becomes connected again.
// Now picks with this override host can be completed again.
// Picks without an override host still don't use the draining host.
gpr_log(GPR_INFO, "### subchannel becomes reconnected");
subchannel->SetConnectivityState(GRPC_CHANNEL_READY);
picker = ExpectState(GRPC_CHANNEL_READY);
EXPECT_EQ(ExpectPickComplete(picker.get(),
Expand Down

0 comments on commit edcc61a

Please sign in to comment.