Skip to content

Commit

Permalink
merge to stable-23-3: issue-1394 issue-1395 issue-1474 (#1650)
Browse files Browse the repository at this point in the history
cherry-pick:

[Filestore] add SetNodeAttr command to filestore client #1469

issue 1394: optimize cleanup devices #1476

issue-1395: run cleaning process per device pool #1513

[Filestore] print response of create session command #1549

issue-1394: TryUpdateDevices refactoring #1578

update CMakeLists

ffc1468 fix unit tests

cherry-pick Support StoreResult to std::optional in LastGetopt
  • Loading branch information
antonmyagkov committed Jul 25, 2024
1 parent e7846e5 commit 03586de
Show file tree
Hide file tree
Showing 25 changed files with 777 additions and 106 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class TDiskRegistryActor final
bool UsersNotificationInProgress = false;
bool DiskStatesPublicationInProgress = false;
bool AutomaticallyReplacedDevicesDeletionInProgress = false;
bool SecureEraseInProgress = false;
THashSet<TString> SecureEraseInProgressPerPool;
bool StartMigrationInProgress = false;

TVector<TString> DisksBeingDestroyed;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class TSecureEraseActor final
const TRequestInfoPtr Request;
const TDuration RequestTimeout;

const TString PoolName;
TVector<NProto::TDeviceConfig> Devices;
TVector<TString> CleanDevices;

Expand All @@ -34,6 +35,7 @@ class TSecureEraseActor final
const TActorId& owner,
TRequestInfoPtr request,
TDuration requestTimeout,
TString poolName,
TVector<NProto::TDeviceConfig> devicesToClean);

void Bootstrap(const TActorContext& ctx);
Expand Down Expand Up @@ -73,10 +75,12 @@ TSecureEraseActor::TSecureEraseActor(
const TActorId& owner,
TRequestInfoPtr request,
TDuration requestTimeout,
TString poolName,
TVector<NProto::TDeviceConfig> devicesToClean)
: Owner(owner)
, Request(std::move(request))
, RequestTimeout(requestTimeout)
, PoolName(std::move(poolName))
, Devices(std::move(devicesToClean))
{}

Expand Down Expand Up @@ -113,6 +117,7 @@ void TSecureEraseActor::ReplyAndDie(const TActorContext& ctx, NProto::TError err
{
auto response = std::make_unique<TEvDiskRegistryPrivate::TEvSecureEraseResponse>(
std::move(error),
PoolName,
CleanDevices.size());
NCloud::Reply(ctx, *Request, std::move(response));

Expand Down Expand Up @@ -273,16 +278,9 @@ void TDiskRegistryActor::ExecuteCleanupDevices(
TTransactionContext& tx,
TTxDiskRegistry::TCleanupDevices& args)
{
Y_UNUSED(ctx);

TDiskRegistryDatabase db(tx.DB);

for (const auto& uuid: args.Devices) {
auto diskId = State->MarkDeviceAsClean(ctx.Now(), db, uuid);
if (diskId) {
args.SyncDeallocatedDisks.push_back(std::move(diskId));
}
}
args.SyncDeallocatedDisks =
State->MarkDevicesAsClean(ctx.Now(), db, args.Devices);
}

void TDiskRegistryActor::CompleteCleanupDevices(
Expand All @@ -301,10 +299,6 @@ void TDiskRegistryActor::CompleteCleanupDevices(

void TDiskRegistryActor::SecureErase(const TActorContext& ctx)
{
if (SecureEraseInProgress) {
return;
}

auto dirtyDevices = State->GetDirtyDevices();
EraseIf(dirtyDevices, [&] (auto& d) {
if (d.GetState() == NProto::DEVICE_STATE_ERROR) {
Expand Down Expand Up @@ -374,30 +368,55 @@ void TDiskRegistryActor::SecureErase(const TActorContext& ctx)
countBeforeFiltration,
dirtyDevices.size());

SecureEraseInProgress = true;

auto request = std::make_unique<TEvDiskRegistryPrivate::TEvSecureEraseRequest>(
std::move(dirtyDevices),
Config->GetNonReplicatedSecureEraseTimeout());

auto deadline = Min(SecureEraseStartTs, ctx.Now()) + TDuration::Seconds(5);
if (deadline > ctx.Now()) {
LOG_INFO(ctx, TBlockStoreComponents::DISK_REGISTRY,
"[%lu] Scheduled secure erase, now: %lu, deadline: %lu",
TabletID(),
ctx.Now().MicroSeconds(),
deadline.MicroSeconds());

ctx.ExecutorThread.Schedule(
deadline,
new IEventHandle(ctx.SelfID, ctx.SelfID, request.get()));
request.release();
} else {
LOG_INFO(ctx, TBlockStoreComponents::DISK_REGISTRY,
"[%lu] Sending secure erase request",
TabletID());
auto it = dirtyDevices.begin();
while (it != dirtyDevices.end()) {
auto first = it;
const auto poolName = first->GetPoolName();
it = std::partition(
first,
dirtyDevices.end(),
[&poolName](const auto& device)
{ return poolName == device.GetPoolName(); });

auto [_, alreadyInProgress] =
SecureEraseInProgressPerPool.insert(poolName);
if (!alreadyInProgress) {
continue;
}

NCloud::Send(ctx, ctx.SelfID, std::move(request));
auto request =
std::make_unique<TEvDiskRegistryPrivate::TEvSecureEraseRequest>(
poolName,
TVector<NProto::TDeviceConfig>(
std::make_move_iterator(first),
std::make_move_iterator(it)),
Config->GetNonReplicatedSecureEraseTimeout());

auto deadline =
Min(SecureEraseStartTs, ctx.Now()) + TDuration::Seconds(5);
if (deadline > ctx.Now()) {
LOG_INFO(
ctx,
TBlockStoreComponents::DISK_REGISTRY,
"[%lu] Scheduled secure erase for pool: %s, now: %lu, "
"deadline: %lu",
TabletID(),
poolName.c_str(),
ctx.Now().MicroSeconds(),
deadline.MicroSeconds());

ctx.ExecutorThread.Schedule(
deadline,
new IEventHandle(ctx.SelfID, ctx.SelfID, request.release()));
} else {
LOG_INFO(
ctx,
TBlockStoreComponents::DISK_REGISTRY,
"[%lu] Sending secure erase request",
TabletID());

NCloud::Send(ctx, ctx.SelfID, std::move(request));
}
}
}

Expand Down Expand Up @@ -425,6 +444,7 @@ void TDiskRegistryActor::HandleSecureErase(
msg->CallContext
),
msg->RequestTimeout,
msg->PoolName,
std::move(msg->DirtyDevices));
Actors.insert(actor);
}
Expand All @@ -440,7 +460,7 @@ void TDiskRegistryActor::HandleSecureEraseResponse(
TabletID(),
msg->CleanDevices);

SecureEraseInProgress = false;
SecureEraseInProgressPerPool.erase(msg->PoolName);
SecureErase(ctx);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ void TDiskRegistryActor::CompleteWritableState(
DisksNotificationInProgress = false;
UsersNotificationInProgress = false;
DiskStatesPublicationInProgress = false;
SecureEraseInProgress = false;
SecureEraseInProgressPerPool.clear();
StartMigrationInProgress = false;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,25 +267,32 @@ struct TEvDiskRegistryPrivate

struct TSecureEraseRequest
{
TString PoolName;
TVector<NProto::TDeviceConfig> DirtyDevices;
TDuration RequestTimeout;

explicit TSecureEraseRequest(
TSecureEraseRequest(
TString poolName,
TVector<NProto::TDeviceConfig> dirtyDevices,
TDuration requestTimeout)
: DirtyDevices(std::move(dirtyDevices))
: PoolName(std::move(poolName))
, DirtyDevices(std::move(dirtyDevices))
, RequestTimeout(requestTimeout)
{}
};

struct TSecureEraseResponse
{
TString PoolName;
size_t CleanDevices = 0;

TSecureEraseResponse() = default;

explicit TSecureEraseResponse(size_t cleanDevices)
: CleanDevices(cleanDevices)
TSecureEraseResponse(
TString poolName,
size_t cleanDevices)
: PoolName(std::move(poolName))
, CleanDevices(cleanDevices)
{}
};

Expand Down
67 changes: 51 additions & 16 deletions cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3601,41 +3601,76 @@ bool TDiskRegistryState::MarkDeviceAsDirty(
return true;
}

TString TDiskRegistryState::MarkDeviceAsClean(
TDiskRegistryState::TDiskId TDiskRegistryState::MarkDeviceAsClean(
TInstant now,
TDiskRegistryDatabase& db,
const TDeviceId& uuid)
{
DeviceList.MarkDeviceAsClean(uuid);
db.DeleteDirtyDevice(uuid);
auto ret = MarkDevicesAsClean(now, db, TVector<TDeviceId>{uuid});
return ret.empty() ? "" : ret[0];
}

if (!DeviceList.IsSuspendedDevice(uuid)) {
db.DeleteSuspendedDevice(uuid);
TVector<TDiskRegistryState::TDiskId> TDiskRegistryState::MarkDevicesAsClean(
TInstant now,
TDiskRegistryDatabase& db,
const TVector<TDeviceId>& uuids)
{
for (const auto& uuid: uuids) {
DeviceList.MarkDeviceAsClean(uuid);
db.DeleteDirtyDevice(uuid);

if (!DeviceList.IsSuspendedDevice(uuid)) {
db.DeleteSuspendedDevice(uuid);
}
}

TryUpdateDevice(now, db, uuid);
TVector<TDiskId> ret;
for (const auto& uuid: TryUpdateDevices(now, db, uuids)) {
if (auto diskId = PendingCleanup.EraseDevice(uuid); !diskId.empty()) {
ret.push_back(std::move(diskId));
}
}

return PendingCleanup.EraseDevice(uuid);
return ret;
}

bool TDiskRegistryState::TryUpdateDevice(
TInstant now,
TDiskRegistryDatabase& db,
const TDeviceId& uuid)
{
Y_UNUSED(now);
return !TryUpdateDevices(now, db, {uuid}).empty();
}

auto [agent, device] = FindDeviceLocation(uuid);
if (!agent || !device) {
return false;
}
TVector<TDiskRegistryState::TDeviceId> TDiskRegistryState::TryUpdateDevices(
TInstant now,
TDiskRegistryDatabase& db,
const TVector<TDeviceId>& uuids)
{
TVector<TDeviceId> ret;
ret.reserve(uuids.size());

AdjustDeviceIfNeeded(*device, {});
TSet<TAgentId> agentsSet;
for (const auto& uuid: uuids) {
auto [agent, device] = FindDeviceLocation(uuid);
if (!agent || !device) {
continue;
}
ret.push_back(uuid);
agentsSet.emplace(agent->GetAgentId());
AdjustDeviceIfNeeded(*device, now);
}

UpdateAgent(db, *agent);
DeviceList.UpdateDevices(*agent, DevicePoolConfigs);
for (const auto& agentId: agentsSet) {
auto* agent = AgentList.FindAgent(agentId);
if (!agent) {
continue;
}
UpdateAgent(db, *agent);
DeviceList.UpdateDevices(*agent, DevicePoolConfigs);
}

return true;
return ret;
}

TVector<TString> TDiskRegistryState::CollectBrokenDevices(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -493,10 +493,22 @@ class TDiskRegistryState
TVector<NProto::TDeviceConfig> GetBrokenDevices() const;

TVector<NProto::TDeviceConfig> GetDirtyDevices() const;
TString MarkDeviceAsClean(

/// Mark selected device as clean and remove it
/// from lists of suspended/dirty/pending cleanup devices
/// @return disk id where selected device was allocated
TDiskId MarkDeviceAsClean(
TInstant now,
TDiskRegistryDatabase& db,
const TDeviceId& uuid);

/// Mark selected devices as clean and remove them
/// from lists of suspended/dirty/pending cleanup devices
/// @return vector of disk ids where selected devices were allocated
TVector<TDiskId> MarkDevicesAsClean(
TInstant now,
TDiskRegistryDatabase& db,
const TVector<TDeviceId>& uuids);
bool MarkDeviceAsDirty(TDiskRegistryDatabase& db, const TDeviceId& uuid);

NProto::TError CreatePlacementGroup(
Expand Down Expand Up @@ -1123,11 +1135,22 @@ class TDiskRegistryState
TDiskRegistryDatabase& db,
const TString& diskId);

/// Try to update configuration of selected device and its agent
/// in the disk registry database
/// @return true if the device updates successfully; otherwise, return false
bool TryUpdateDevice(
TInstant now,
TDiskRegistryDatabase& db,
const TDeviceId& uuid);

/// Try to update configuration of selected devices and their agents
/// in the disk registry database
/// @return List of updated devices
TVector<TDeviceId> TryUpdateDevices(
TInstant now,
TDiskRegistryDatabase& db,
const TVector<TDeviceId>& uuids);

TDeviceList::TAllocationQuery MakeMigrationQuery(
const TDiskId& sourceDiskId,
const NProto::TDeviceConfig& sourceDevice);
Expand Down
Loading

0 comments on commit 03586de

Please sign in to comment.