Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge to stable NBS-4787, NBS-4748 #195

Merged
merged 3 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions cloud/blockstore/config/diagnostics.proto
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,25 @@ message TVolumePerfSettings
optional uint32 CriticalFactor = 3;
};

////////////////////////////////////////////////////////////////////////////////
// Monitoring URL components

message TMonitoringUrlData
{
// Monitoring cluster name (e.g. yandexcloud_prod_vla).
optional string MonitoringClusterName = 1;
// Monitoring host url.
optional string MonitoringUrl = 2;
// Project name in monitoring.
optional string MonitoringProject = 3;

// Dashboards ids for monitoring URL formation.
optional string MonitoringVolumeDashboard = 4;
optional string MonitoringPartitionDashboard = 5;
optional string MonitoringNBSAlertsDashboard = 6;
optional string MonitoringNBSTVDashboard = 7;
};

////////////////////////////////////////////////////////////////////////////////

message TDiagnosticsConfig
Expand All @@ -72,8 +91,9 @@ message TDiagnosticsConfig
// Kikimr monitoring port.
optional string KikimrMonitoringPort = 4;

// [obsolete]
// Name of cluster in Solomon.
optional string SolomonClusterName = 5;
// optional string SolomonClusterName = 5;

// Kikimr monitoring port.
optional uint32 KikimrMonPort = 6;
Expand All @@ -94,17 +114,19 @@ message TDiagnosticsConfig
// HDD disk performance threshold
// optional TVolumePerfThreshold HddPerfThreshold = 15;

// [obsolete]
// Solomon host url.
optional string SolomonUrl = 16;
// optional string SolomonUrl = 16;

// Allow destructive LWTrace actions.
optional bool UnsafeLWTrace = 17;

// Path to LWTrace query file.
optional string LWTraceDebugInitializationQuery = 18;

// [obsolete]
// Project name in solomon.
optional string SolomonProject = 20;
// optional string SolomonProject = 20;

// Sampling rate for request tracking
optional uint32 SamplingRate = 21;
Expand Down Expand Up @@ -178,4 +200,7 @@ message TDiagnosticsConfig

// Performance measurements coefficients for HDD NRD.
optional TVolumePerfSettings HddNonreplPerfSettings = 46;

// Monitoring data necessary for link generation on monpages.
optional TMonitoringUrlData MonitoringUrlData = 47;
}
33 changes: 30 additions & 3 deletions cloud/blockstore/libs/diagnostics/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ namespace {
xxx(HostNameScheme, NProto::EHostNameScheme, NProto::EHostNameScheme::HOSTNAME_RAW )\
xxx(BastionNameSuffix, TString, "" )\
xxx(ViewerHostName, TString, "" )\
xxx(SolomonClusterName, TString, "" )\
xxx(KikimrMonPort, ui32, 8765 )\
xxx(NbsMonPort, ui32, 8766 )\
\
Expand All @@ -26,8 +25,6 @@ namespace {
\
xxx(ProfileLogTimeThreshold, TDuration, TDuration::Seconds(15) )\
xxx(UseAsyncLogger, bool, false )\
xxx(SolomonUrl, TString, "" )\
xxx(SolomonProject, TString, "nbs" )\
xxx(UnsafeLWTrace, bool, false )\
xxx(LWTraceDebugInitializationQuery, TString, "" )\
xxx(SsdPerfSettings, TVolumePerfSettings, {} )\
Expand All @@ -40,6 +37,7 @@ namespace {
xxx(ExpectedIoParallelism, ui32, 32 )\
xxx(CloudIdsWithStrictSLA, TVector<TString>, {} )\
xxx(LWTraceShuttleCount, ui32, 2000 )\
xxx(MonitoringUrlData, TMonitoringUrlData, {} )\
\
xxx(CpuWaitFilename, TString, "/sys/fs/cgroup/cpu/system.slice/nbs.service/cpuacct.wait" )\
\
Expand Down Expand Up @@ -85,6 +83,14 @@ ConvertValue<TVolumePerfSettings, NProto::TVolumePerfSettings>(
return TVolumePerfSettings(value);
}

template <>
TMonitoringUrlData
ConvertValue<TMonitoringUrlData, NProto::TMonitoringUrlData>(
const NProto::TMonitoringUrlData& value)
{
return TMonitoringUrlData(value);
}

template <>
TRequestThresholds
ConvertValue<TRequestThresholds, TProtoRequestThresholds>(
Expand Down Expand Up @@ -115,6 +121,11 @@ bool IsEmpty(const NProto::TVolumePerfSettings& t)
return t.ByteSizeLong() == 0;
}

bool IsEmpty(const NProto::TMonitoringUrlData& t)
{
return t.ByteSizeLong() == 0;
}

template <typename T>
bool IsEmpty(const google::protobuf::RepeatedPtrField<T>& value)
{
Expand Down Expand Up @@ -253,6 +264,22 @@ void Out<NCloud::NBlockStore::TVolumePerfSettings>(
SerializeToTextFormat(v, out);
}

template <>
void Out<NCloud::NBlockStore::TMonitoringUrlData>(
IOutputStream& out,
const NCloud::NBlockStore::TMonitoringUrlData& value)
{
NCloud::NBlockStore::NProto::TMonitoringUrlData v;
v.SetMonitoringClusterName(value.MonitoringClusterName);
v.SetMonitoringUrl(value.MonitoringUrl);
v.SetMonitoringProject(value.MonitoringProject);
v.SetMonitoringVolumeDashboard(value.MonitoringVolumeDashboard);
v.SetMonitoringPartitionDashboard(value.MonitoringPartitionDashboard);
v.SetMonitoringNBSAlertsDashboard(value.MonitoringNBSAlertsDashboard);
v.SetMonitoringNBSTVDashboard(value.MonitoringNBSTVDashboard);
SerializeToTextFormat(v, out);
}

template <>
void Out<NCloud::TRequestThresholds>(
IOutputStream& out,
Expand Down
32 changes: 29 additions & 3 deletions cloud/blockstore/libs/diagnostics/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,34 @@ struct TVolumePerfSettings:

////////////////////////////////////////////////////////////////////////////////

struct TMonitoringUrlData: public TAtomicRefCount<TMonitoringUrlData>
{
TString MonitoringClusterName;
TString MonitoringUrl;
TString MonitoringProject;
TString MonitoringVolumeDashboard;
TString MonitoringPartitionDashboard;
TString MonitoringNBSAlertsDashboard;
TString MonitoringNBSTVDashboard;

TMonitoringUrlData()
: MonitoringProject("nbs")
{}
TMonitoringUrlData(const TMonitoringUrlData& rhs) = default;

explicit TMonitoringUrlData(const NProto::TMonitoringUrlData& data)
: MonitoringClusterName(data.GetMonitoringClusterName())
, MonitoringUrl(data.GetMonitoringUrl())
, MonitoringProject(data.GetMonitoringProject())
, MonitoringVolumeDashboard(data.GetMonitoringVolumeDashboard())
, MonitoringPartitionDashboard(data.GetMonitoringPartitionDashboard())
, MonitoringNBSAlertsDashboard(data.GetMonitoringNBSAlertsDashboard())
, MonitoringNBSTVDashboard(data.GetMonitoringNBSTVDashboard())
{}
};

////////////////////////////////////////////////////////////////////////////////

class TDiagnosticsConfig
{
private:
Expand All @@ -81,9 +109,6 @@ class TDiagnosticsConfig
NProto::EHostNameScheme GetHostNameScheme() const;
TString GetBastionNameSuffix() const;
TString GetViewerHostName() const;
TString GetSolomonClusterName() const;
TString GetSolomonUrl() const;
TString GetSolomonProject() const;
ui32 GetKikimrMonPort() const;
ui32 GetNbsMonPort() const;
ui32 GetSamplingRate() const;
Expand All @@ -106,6 +131,7 @@ class TDiagnosticsConfig
TVolumePerfSettings GetLocalSSDPerfSettings() const;
ui32 GetExpectedIoParallelism() const;
TVector<TString> GetCloudIdsWithStrictSLA() const;
TMonitoringUrlData GetMonitoringUrlData() const;

TString GetCpuWaitFilename() const;

Expand Down
85 changes: 38 additions & 47 deletions cloud/blockstore/libs/diagnostics/hostname.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,74 +72,65 @@ TString GetExternalHostUrl(
return out;
}

TString GetSolomonVolumeUrl(
TString GetMonitoringVolumeUrl(
const TDiagnosticsConfig& config,
const TString& diskId,
const TString& dashboard)
const TString& diskId)
{
TMonitoringUrlData data = config.GetMonitoringUrlData();
return TStringBuilder()
<< config.GetSolomonUrl()
<< "/?project=" << config.GetSolomonProject()
<< "&service=service_volume"
<< "&cluster="<< config.GetSolomonClusterName()
<< "&volume=" << diskId
<< "&dashboard=" << dashboard;
<< data.MonitoringUrl << "/projects/" << data.MonitoringProject
<< "/dashboards/" << data.MonitoringVolumeDashboard
<< "?from=now-1d&to=now&refresh=60000&p.cluster="
<< data.MonitoringClusterName << "&p.volume=" << diskId;
}

TString GetSolomonPartitionUrl(
const TDiagnosticsConfig& config,
const TString& dashboard)
TString GetMonitoringPartitionUrl(const TDiagnosticsConfig& config)
{
TMonitoringUrlData data = config.GetMonitoringUrlData();
return TStringBuilder()
<< config.GetSolomonUrl()
<< "/?project=" << config.GetSolomonProject()
<< "&service=tablets"
<< "&cluster=" << config.GetSolomonClusterName()
<< "&host=" << GetShortHostName()
<< "&dashboard=" << dashboard;
<< data.MonitoringUrl << "/projects/" << data.MonitoringProject
<< "/dashboards/" << data.MonitoringPartitionDashboard
<< "?from=now-1d&to=now&"
"refresh=60000&p.service=tablets&p.cluster="
<< data.MonitoringClusterName << "&p.host=" << GetShortHostName();
}

TString GetSolomonServerUrl(
const TDiagnosticsConfig& config,
const TString& dashboard)
TString GetMonitoringNBSAlertsUrl(const TDiagnosticsConfig& config)
{
TMonitoringUrlData data = config.GetMonitoringUrlData();
return TStringBuilder()
<< config.GetSolomonUrl()
<< "/?project" << config.GetSolomonProject()
<< "&service=server"
<< "&cluster=" << config.GetSolomonClusterName()
<< "&host=" << GetShortHostName()
<< "&type=-"
<< "&dashboard="<< dashboard;
<< data.MonitoringUrl << "/projects/" << data.MonitoringProject
<< "/dashboards/" << data.MonitoringNBSAlertsDashboard
<< "?from=now-1d&to=now&refresh=60000&p.cluster="
<< data.MonitoringClusterName << "&p.host=" << GetShortHostName();
}

TString GetSolomonClientUrl(
const TDiagnosticsConfig& config,
const TString& dashboard)
TString GetMonitoringNBSOverviewToTVUrl(const TDiagnosticsConfig& config)
{
TMonitoringUrlData data = config.GetMonitoringUrlData();
return TStringBuilder()
<< config.GetSolomonUrl()
<< "/?project=" << config.GetSolomonProject()
<< "&service=client"
<< "&cluster="<< config.GetSolomonClusterName()
<< "&host=" << GetShortHostName()
<< "&type=-"
<< "&dashboard=" << dashboard;
<< data.MonitoringUrl << "/projects/" << data.MonitoringProject
<< "/dashboards/" << data.MonitoringNBSTVDashboard
<< "?from=now-1d&to=now&refresh=60000&p.cluster="
<< data.MonitoringClusterName << "&p.host=cluster";
}

TString GetSolomonBsProxyUrl(
TString GetMonitoringYDBGroupUrl(
const TDiagnosticsConfig& config,
ui32 groupId,
const TString& dashboard)
const TString& storagePool)
{
TMonitoringUrlData data = config.GetMonitoringUrlData();
return TStringBuilder()
<< config.GetSolomonUrl()
<< "/?project" << config.GetSolomonProject()
<< "&service=dsproxy_percentile"
<< "&cluster=" << config.GetSolomonClusterName()
<< "&host=" << GetShortHostName()
<< "&blobstorageproxy=" << groupId
<< "&dashboard=" << dashboard;
<< data.MonitoringUrl
<< "/projects/kikimr/explorer/"
"queries?q.0.s=histogram_percentile(99, {project=\"kikimr"
<< "\", cluster=\"" << data.MonitoringClusterName
<< "\", storagePool=\"" << storagePool << "\", group=\"" << groupId
<< "\", host=\"*\", service=\"vdisks\", "
"subsystem=\"latency_histo\", "
"handleclass=\"GetFast\"})&q.0.name=A&from=now-1d&to=now&refresh="
"60000";
}

} // namespace NCloud::NBlockStore
21 changes: 7 additions & 14 deletions cloud/blockstore/libs/diagnostics/hostname.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,26 +24,19 @@ TString GetExternalHostUrl(
EHostService serviceType,
const TDiagnosticsConfig& config);

TString GetSolomonServerUrl(
const TDiagnosticsConfig& config,
const TString& dashboard);
TString GetMonitoringNBSAlertsUrl(const TDiagnosticsConfig& config);

TString GetSolomonClientUrl(
const TDiagnosticsConfig& config,
const TString& dashboard);
TString GetMonitoringNBSOverviewToTVUrl(const TDiagnosticsConfig& config);

TString GetSolomonVolumeUrl(
TString GetMonitoringVolumeUrl(
const TDiagnosticsConfig& config,
const TString& diskId,
const TString& dashboard);
const TString& diskId);

TString GetSolomonPartitionUrl(
const TDiagnosticsConfig& config,
const TString& dashboard);
TString GetMonitoringPartitionUrl(const TDiagnosticsConfig& config);

TString GetSolomonBsProxyUrl(
TString GetMonitoringYDBGroupUrl(
const TDiagnosticsConfig& config,
ui32 groupId,
const TString& dashboard);
const TString& storagePool);

} // namespace NCloud::NBlockStore
10 changes: 5 additions & 5 deletions cloud/blockstore/libs/diagnostics/server_stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -601,15 +601,15 @@ void TServerStats::OutputHtml(IOutputStream& out, const IMonHttpRequest& request
if (DiagnosticsConfig) {
TAG(TH3) {
out << "<a href='"
<< GetSolomonServerUrl(*DiagnosticsConfig, "nbs-server-monitoring")
<< "'>Server dashboards</a>";
<< GetMonitoringNBSAlertsUrl(*DiagnosticsConfig)
<< "'>NBS Alerts dashboard</a>";
};

TAG(TH3) {
out << "<a href='"
<< GetSolomonClientUrl(*DiagnosticsConfig, "nbs-compute-client-monitoring")
<< "'>Client dashboards</a>";
};
<< GetMonitoringNBSOverviewToTVUrl(*DiagnosticsConfig)
<< "'>NBS overview To TV</a>";
}
}

TAG(TH3) { out << "Config"; }
Expand Down
10 changes: 4 additions & 6 deletions cloud/blockstore/libs/storage/core/monitoring_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -968,28 +968,26 @@ void DumpCompactionMap(
}
}

void DumpSolomonVolumeLink(
void DumpMonitoringVolumeLink(
IOutputStream& out,
const TDiagnosticsConfig& config,
const TString& diskId)
{
HTML(out) {
TAG(TH3) {
out << "<a href='"
<< GetSolomonVolumeUrl(config, diskId, "nbs-volume-overview")
out << "<a href='" << GetMonitoringVolumeUrl(config, diskId)
<< "'>Volume dashboards</a>";
}
}
}

void DumpSolomonPartitionLink(
void DumpMonitoringPartitionLink(
IOutputStream& out,
const TDiagnosticsConfig& config)
{
HTML(out) {
TAG(TH3) {
out << "<a href='"
<< GetSolomonPartitionUrl(config, "nbs-tablets-transactions")
out << "<a href='" << GetMonitoringPartitionUrl(config)
<< "'>Partition dashboards</a>";
}
}
Expand Down
Loading
Loading