Skip to content

Commit

Permalink
Ability to disable node broker registration in the DA (#2038)
Browse files Browse the repository at this point in the history
* Ability to disable node broker registration in the DA

* Fix typo in comment

* Fix linkage
  • Loading branch information
komarevtsev-d authored Sep 16, 2024
1 parent 700f92a commit 37ab38e
Show file tree
Hide file tree
Showing 11 changed files with 143 additions and 38 deletions.
2 changes: 1 addition & 1 deletion cloud/blockstore/apps/disk_agent/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ PEERDIR(
library/cpp/getopt
)

IF (BUILD_TYPE != "PROFILE")
IF (BUILD_TYPE != "PROFILE" AND BUILD_TYPE != "DEBUG")
SPLIT_DWARF()
ENDIF()

Expand Down
9 changes: 9 additions & 0 deletions cloud/blockstore/config/disk.proto
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,15 @@ message TDiskAgentConfig

// Offload the parsing of all IO requests (by default offloads only write requests).
optional bool OffloadAllIORequestsParsingEnabled = 31;

// When enabled, the Disk Agents checks that devices were found either in
// the "CachedConfigPath", in the "FileDevices", or with the
// "StorageDiscoveryConfig". If none were found, the process falls in the
// deep idle state without even registering in the NodeBroker.
// WARNING: CMS configs can only be retrieved after registering in the
// NodeBroker. Enabling this option completely disables them if there were
// no devices found.
optional bool DisableNodeBrokerRegisterationOnDevicelessAgent = 32;
}

////////////////////////////////////////////////////////////////////////////////
Expand Down
61 changes: 59 additions & 2 deletions cloud/blockstore/libs/disk_agent/bootstrap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
#include <cloud/blockstore/libs/storage/core/config.h>
#include <cloud/blockstore/libs/storage/core/probes.h>
#include <cloud/blockstore/libs/storage/disk_agent/model/config.h>
#include <cloud/blockstore/libs/storage/disk_agent/model/device_generator.h>
#include <cloud/blockstore/libs/storage/disk_agent/model/device_scanner.h>
#include <cloud/blockstore/libs/storage/disk_agent/model/probes.h>
#include <cloud/blockstore/libs/storage/disk_registry_proxy/model/config.h>
#include <cloud/blockstore/libs/storage/init/disk_agent/actorsystem.h>
Expand Down Expand Up @@ -92,6 +94,33 @@ void ParseProtoTextFromFile(const TString& fileName, T& dst)
ParseFromTextFormat(in, dst);
}

bool AgentHasDevices(
TLog log,
const NStorage::TStorageConfigPtr& storageConfig,
const NStorage::TDiskAgentConfigPtr& agentConfig)
{
if (!agentConfig->GetFileDevices().empty()) {
return true;
}

const TString storagePath = storageConfig->GetCachedDiskAgentConfigPath();
const TString diskAgentPath = agentConfig->GetCachedConfigPath();
const TString& path = diskAgentPath.empty() ? storagePath : diskAgentPath;
auto cachedDevices = NStorage::LoadCachedConfig(path);
if (!cachedDevices.empty()) {
return true;
}

NStorage::TDeviceGenerator gen{std::move(log), agentConfig->GetAgentId()};
auto error =
FindDevices(agentConfig->GetStorageDiscoveryConfig(), std::ref(gen));
if (HasError(error)) {
return false;
}

return !gen.ExtractResult().empty();
}

////////////////////////////////////////////////////////////////////////////////

class TLoggingProxy final
Expand Down Expand Up @@ -204,8 +233,11 @@ void TBootstrap::Init()
Timer = CreateWallClockTimer();
Scheduler = CreateScheduler();

InitKikimrService();
if (!InitKikimrService()) {
return;
}

Initialized = true;
STORAGE_INFO("Kikimr service initialized");

auto diagnosticsConfig = Configs->DiagnosticsConfig;
Expand Down Expand Up @@ -281,7 +313,7 @@ void TBootstrap::InitRdmaServer(NRdma::TRdmaConfig& config)
}
}

void TBootstrap::InitKikimrService()
bool TBootstrap::InitKikimrService()
{
Configs->InitKikimrConfig();
Configs->InitServerConfig();
Expand Down Expand Up @@ -328,6 +360,23 @@ void TBootstrap::InitKikimrService()

STORAGE_INFO("Configs initialized");

if (const auto& agentConfig = Configs->DiskAgentConfig;
agentConfig->GetDisableNodeBrokerRegisterationOnDevicelessAgent())
{
if (!agentConfig->GetEnabled()) {
STORAGE_INFO(
"Agent is disabled. Skipping the node broker registration.");
return false;
}

if (!AgentHasDevices(Log, Configs->StorageConfig, agentConfig)) {
STORAGE_INFO(
"Devices were not found. Skipping the node broker "
"registration.");
return false;
}
}

auto [nodeId, scopeId, cmsConfig] = RegisterDynamicNode(
Configs->KikimrConfig,
registerOpts,
Expand Down Expand Up @@ -450,6 +499,8 @@ void TBootstrap::InitKikimrService()
if (SpdkLogInitializer) {
SpdkLogInitializer(spdkLog);
}

return true;
}

void TBootstrap::InitLWTrace()
Expand Down Expand Up @@ -522,6 +573,9 @@ void TBootstrap::InitLWTrace()

void TBootstrap::Start()
{
if (!Initialized) {
return;
}
#define START_COMPONENT(c) \
if (c) { \
c->Start(); \
Expand Down Expand Up @@ -556,6 +610,9 @@ void TBootstrap::Start()

void TBootstrap::Stop()
{
if (!Initialized) {
return;
}
#define STOP_COMPONENT(c) \
if (c) { \
c->Stop(); \
Expand Down
4 changes: 3 additions & 1 deletion cloud/blockstore/libs/disk_agent/bootstrap.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ class TBootstrap
TProgramShouldContinue ShouldContinue;
TVector<TString> PostponedCriticalEvents;

bool Initialized = false;

public:
TBootstrap(
std::shared_ptr<NKikimr::TModuleFactories> moduleFactories,
Expand All @@ -91,7 +93,7 @@ class TBootstrap
private:
void InitLWTrace();
void InitProfileLog();
void InitKikimrService();
bool InitKikimrService();

void InitRdmaServer(NRdma::TRdmaConfig& config);
};
Expand Down
1 change: 1 addition & 0 deletions cloud/blockstore/libs/storage/disk_agent/model/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ namespace {
xxx(TemporaryAgent, bool, false )\
xxx(IOParserActorCount, ui32, 0 )\
xxx(OffloadAllIORequestsParsingEnabled, bool, false )\
xxx(DisableNodeBrokerRegisterationOnDevicelessAgent, bool, false )\
// BLOCKSTORE_AGENT_CONFIG

#define BLOCKSTORE_DECLARE_CONFIG(name, type, value) \
Expand Down
1 change: 1 addition & 0 deletions cloud/blockstore/libs/storage/disk_agent/model/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ class TDiskAgentConfig

ui32 GetIOParserActorCount() const;
bool GetOffloadAllIORequestsParsingEnabled() const;
bool GetDisableNodeBrokerRegisterationOnDevicelessAgent() const;

void Dump(IOutputStream& out) const;
void DumpHtml(IOutputStream& out) const;
Expand Down
26 changes: 24 additions & 2 deletions cloud/blockstore/libs/storage/disk_agent/model/device_scanner.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
#include "device_scanner.h"

#include <cloud/storage/core/libs/diagnostics/logging.h>

#include <cloud/blockstore/libs/storage/core/config.h>
#include <cloud/blockstore/libs/storage/disk_agent/model/config.h>
#include <cloud/storage/core/libs/common/proto_helpers.h>
#include <cloud/storage/core/libs/diagnostics/logging.h>

#include <util/generic/algorithm.h>
#include <util/string/builder.h>
#include <util/system/file.h>
#include <util/system/fs.h>

#include <sys/stat.h>

Expand Down Expand Up @@ -160,4 +162,24 @@ NProto::TError FindDevices(
return {};
}

TVector<NProto::TFileDeviceArgs> LoadCachedConfig(const TString& path)
{
if (path.empty()) {
return {};
}

if (!NFs::Exists(path)) {
return {};
}

NProto::TDiskAgentConfig proto;
ParseProtoTextFromFileRobust(path, proto);

auto& devices = *proto.MutableFileDevices();

return {
std::make_move_iterator(devices.begin()),
std::make_move_iterator(devices.end())};
}

} // namespace NCloud::NBlockStore::NStorage
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "public.h"

#include <cloud/blockstore/config/disk.pb.h>
#include <cloud/blockstore/libs/storage/core/public.h>
#include <cloud/storage/core/libs/common/error.h>

#include <functional>
Expand All @@ -13,7 +14,7 @@ namespace NCloud::NBlockStore::NStorage {

////////////////////////////////////////////////////////////////////////////////

using TDeviceCallback = std::function <NProto::TError (
using TDeviceCallback = std::function<NProto::TError(
const TString& path,
const NProto::TStorageDiscoveryConfig::TPoolConfig& poolConfig,
ui32 deviceNumber,
Expand All @@ -25,4 +26,6 @@ NProto::TError FindDevices(
const NProto::TStorageDiscoveryConfig& config,
TDeviceCallback callback);

TVector<NProto::TFileDeviceArgs> LoadCachedConfig(const TString& path);

} // namespace NCloud::NBlockStore::NStorage
31 changes: 4 additions & 27 deletions cloud/blockstore/libs/storage/disk_agent/storage_initializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ class TInitializer
bool ValidateStorageDiscoveryConfig() const;
void ValidateCurrentConfigs();

TVector<NProto::TFileDeviceArgs> LoadCachedConfig() const;
void SaveCurrentConfig();

void ReportDiskAgentConfigMismatchEvent(const TString& error);
Expand Down Expand Up @@ -349,31 +348,6 @@ void TInitializer::ScanFileDevices()
}
}

TVector<NProto::TFileDeviceArgs> TInitializer::LoadCachedConfig() const
{
const TString storagePath = StorageConfig->GetCachedDiskAgentConfigPath();
const TString diskAgentPath = AgentConfig->GetCachedConfigPath();
const TString& path = diskAgentPath.empty() ? storagePath : diskAgentPath;

if (path.empty()) {
return {};
}

if (!NFs::Exists(path)) {
return {};
}

NProto::TDiskAgentConfig proto;
ParseProtoTextFromFileRobust(path, proto);

auto& devices = *proto.MutableFileDevices();

return {
std::make_move_iterator(devices.begin()),
std::make_move_iterator(devices.end())
};
}

void TInitializer::SaveCurrentConfig()
{
const auto path = AgentConfig->GetCachedConfigPath();
Expand Down Expand Up @@ -406,7 +380,10 @@ void TInitializer::SaveCurrentConfig()

void TInitializer::ValidateCurrentConfigs()
{
auto cachedDevices = LoadCachedConfig();
const TString storagePath = StorageConfig->GetCachedDiskAgentConfigPath();
const TString diskAgentPath = AgentConfig->GetCachedConfigPath();
const TString& path = diskAgentPath.empty() ? storagePath : diskAgentPath;
auto cachedDevices = LoadCachedConfig(path);
if (cachedDevices.empty()) {
STORAGE_INFO("There is no cached config");
SaveCurrentConfig();
Expand Down
37 changes: 35 additions & 2 deletions cloud/blockstore/tests/disk_agent_config/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@

import yatest.common as yatest_common

from contrib.ydb.tests.library.harness.kikimr_runner import get_unique_path_for_current_test, \
ensure_path_exists
from contrib.ydb.tests.library.harness.kikimr_runner import \
get_unique_path_for_current_test, ensure_path_exists


DEVICE_SIZE = 1024 ** 3 # 1 GiB
Expand Down Expand Up @@ -274,3 +274,36 @@ def test_null_backend(nbs, agent_ids, disk_agent_configurators):
blocks = session.read_blocks(0, 1, checkpoint_id="")
assert len(blocks) == 1
session.unmount_volume()


def test_disable_node_broker_registration(nbs, agent_ids, disk_agent_configurators):
assert len(disk_agent_configurators) >= 2

# The first agent will not register in the node broker.
disk_agent_configurators[0].files["disk-agent"]\
.StorageDiscoveryConfig.PathConfigs[0].PathRegExp = "unknown_path"
disk_agent_configurators[0].files["disk-agent"]\
.DisableNodeBrokerRegisterationOnDevicelessAgent = True

# The second agent should register, even without devices.
disk_agent_configurators[1].files["disk-agent"]\
.StorageDiscoveryConfig.PathConfigs[0].PathRegExp = "unknown_path"
disk_agent_configurators[1].files["disk-agent"]\
.DisableNodeBrokerRegisterationOnDevicelessAgent = False

agents = []
for agent_id, configurator in zip(agent_ids, disk_agent_configurators):
wait_for_start = not configurator.files["disk-agent"]\
.DisableNodeBrokerRegisterationOnDevicelessAgent
agents.append(start_disk_agent(configurator, name=agent_id,
wait_for_start=wait_for_start))

for idx, agent in enumerate(agents):
with open(agent.stderr_file_name) as log_file:
deep_idle_agent = \
"Devices were not found. Skipping the node broker registration." in log_file.read()
assert deep_idle_agent == disk_agent_configurators[idx].files[
"disk-agent"].DisableNodeBrokerRegisterationOnDevicelessAgent

for agent in agents:
agent.kill()
4 changes: 2 additions & 2 deletions cloud/blockstore/tests/python/lib/daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def start_nbs(config: NbsConfigurator, name='nbs-server'):
return nbs


def start_disk_agent(config: NbsConfigurator, name='disk-agent'):
def start_disk_agent(config: NbsConfigurator, name='disk-agent', wait_for_start=True):
exe_path = yatest_common.binary_path("cloud/blockstore/apps/disk_agent/diskagentd")

cwd = get_unique_path_for_current_test(
Expand All @@ -190,7 +190,7 @@ def start_disk_agent(config: NbsConfigurator, name='disk-agent'):
commands = [exe_path] + config.params

agent = DiskAgent(
mon_port=config.mon_port,
mon_port=(config.mon_port if wait_for_start else None),
server_port=config.server_port,
commands=[commands],
cwd=cwd,
Expand Down

0 comments on commit 37ab38e

Please sign in to comment.