From a478d504d5c793f333ab2b27fff6e820d627fc21 Mon Sep 17 00:00:00 2001 From: Anton Myagkov Date: Mon, 22 Jul 2024 12:03:06 +0200 Subject: [PATCH] issue-1395: run cleaning process per device pool (#1513) Issue: #1395 Problem: Erasing of hdd disks is significantly slower compare to erasing of nvme disks. Disk registry blocks next erase by setting SecureEraseInProgress flag while current erase is in progress. As a result we can delay erase of the next bunch of nvme disks despite of no nvme disks erase is in progress. SecureErase is a background process which is regularly triggered by disk registry actor. Solution: Split SecureErase by pool name Use different SecureEraseInProgress flags depend on the pool name. --------- Co-authored-by: Pavel Misko --- .../disk_registry/disk_registry_actor.h | 2 +- .../disk_registry_actor_secure_erase.cpp | 83 ++++-- .../disk_registry_actor_writable_state.cpp | 2 +- .../disk_registry/disk_registry_private.h | 15 +- .../disk_registry_ut_lifecycle.cpp | 257 ++++++++++++++++++ .../storage/disk_registry/testlib/test_env.h | 1 + 6 files changed, 326 insertions(+), 34 deletions(-) diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor.h b/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor.h index 6b50bb12c03..9fc073260f9 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor.h +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor.h @@ -90,7 +90,7 @@ class TDiskRegistryActor final bool UsersNotificationInProgress = false; bool DiskStatesPublicationInProgress = false; bool AutomaticallyReplacedDevicesDeletionInProgress = false; - bool SecureEraseInProgress = false; + THashSet SecureEraseInProgressPerPool; bool StartMigrationInProgress = false; TVector DisksBeingDestroyed; diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_secure_erase.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_secure_erase.cpp index 251df41693c..229c6d5628f 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_secure_erase.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_secure_erase.cpp @@ -24,6 +24,7 @@ class TSecureEraseActor final const TRequestInfoPtr Request; const TDuration RequestTimeout; + const TString PoolName; TVector Devices; TVector CleanDevices; @@ -34,6 +35,7 @@ class TSecureEraseActor final const TActorId& owner, TRequestInfoPtr request, TDuration requestTimeout, + TString poolName, TVector devicesToClean); void Bootstrap(const TActorContext& ctx); @@ -73,10 +75,12 @@ TSecureEraseActor::TSecureEraseActor( const TActorId& owner, TRequestInfoPtr request, TDuration requestTimeout, + TString poolName, TVector devicesToClean) : Owner(owner) , Request(std::move(request)) , RequestTimeout(requestTimeout) + , PoolName(std::move(poolName)) , Devices(std::move(devicesToClean)) {} @@ -113,6 +117,7 @@ void TSecureEraseActor::ReplyAndDie(const TActorContext& ctx, NProto::TError err { auto response = std::make_unique( std::move(error), + PoolName, CleanDevices.size()); NCloud::Reply(ctx, *Request, std::move(response)); @@ -301,10 +306,6 @@ void TDiskRegistryActor::CompleteCleanupDevices( void TDiskRegistryActor::SecureErase(const TActorContext& ctx) { - if (SecureEraseInProgress) { - return; - } - auto dirtyDevices = State->GetDirtyDevices(); EraseIf(dirtyDevices, [&] (auto& d) { if (d.GetState() == NProto::DEVICE_STATE_ERROR) { @@ -374,30 +375,55 @@ void TDiskRegistryActor::SecureErase(const TActorContext& ctx) countBeforeFiltration, dirtyDevices.size()); - SecureEraseInProgress = true; - - auto request = std::make_unique( - std::move(dirtyDevices), - Config->GetNonReplicatedSecureEraseTimeout()); - - auto deadline = Min(SecureEraseStartTs, ctx.Now()) + TDuration::Seconds(5); - if (deadline > ctx.Now()) { - LOG_INFO(ctx, TBlockStoreComponents::DISK_REGISTRY, - "[%lu] Scheduled secure erase, now: %lu, deadline: %lu", - TabletID(), - ctx.Now().MicroSeconds(), - deadline.MicroSeconds()); - - ctx.ExecutorThread.Schedule( - deadline, - new IEventHandle(ctx.SelfID, ctx.SelfID, request.get())); - request.release(); - } else { - LOG_INFO(ctx, TBlockStoreComponents::DISK_REGISTRY, - "[%lu] Sending secure erase request", - TabletID()); + auto it = dirtyDevices.begin(); + while (it != dirtyDevices.end()) { + auto first = it; + const auto poolName = first->GetPoolName(); + it = std::partition( + first, + dirtyDevices.end(), + [&poolName](const auto& device) + { return poolName == device.GetPoolName(); }); + + auto [_, alreadyInProgress] = + SecureEraseInProgressPerPool.insert(poolName); + if (!alreadyInProgress) { + continue; + } - NCloud::Send(ctx, ctx.SelfID, std::move(request)); + auto request = + std::make_unique( + poolName, + TVector( + std::make_move_iterator(first), + std::make_move_iterator(it)), + Config->GetNonReplicatedSecureEraseTimeout()); + + auto deadline = + Min(SecureEraseStartTs, ctx.Now()) + TDuration::Seconds(5); + if (deadline > ctx.Now()) { + LOG_INFO( + ctx, + TBlockStoreComponents::DISK_REGISTRY, + "[%lu] Scheduled secure erase for pool: %s, now: %lu, " + "deadline: %lu", + TabletID(), + poolName.c_str(), + ctx.Now().MicroSeconds(), + deadline.MicroSeconds()); + + ctx.ExecutorThread.Schedule( + deadline, + new IEventHandle(ctx.SelfID, ctx.SelfID, request.release())); + } else { + LOG_INFO( + ctx, + TBlockStoreComponents::DISK_REGISTRY, + "[%lu] Sending secure erase request", + TabletID()); + + NCloud::Send(ctx, ctx.SelfID, std::move(request)); + } } } @@ -425,6 +451,7 @@ void TDiskRegistryActor::HandleSecureErase( msg->CallContext ), msg->RequestTimeout, + msg->PoolName, std::move(msg->DirtyDevices)); Actors.insert(actor); } @@ -440,7 +467,7 @@ void TDiskRegistryActor::HandleSecureEraseResponse( TabletID(), msg->CleanDevices); - SecureEraseInProgress = false; + SecureEraseInProgressPerPool.erase(msg->PoolName); SecureErase(ctx); } diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_writable_state.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_writable_state.cpp index df481c96364..bbe30e89bf1 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_writable_state.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_writable_state.cpp @@ -96,7 +96,7 @@ void TDiskRegistryActor::CompleteWritableState( DisksNotificationInProgress = false; UsersNotificationInProgress = false; DiskStatesPublicationInProgress = false; - SecureEraseInProgress = false; + SecureEraseInProgressPerPool.clear(); StartMigrationInProgress = false; } diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_private.h b/cloud/blockstore/libs/storage/disk_registry/disk_registry_private.h index 47bcc1dfe2f..366acfdddfd 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_private.h +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_private.h @@ -267,25 +267,32 @@ struct TEvDiskRegistryPrivate struct TSecureEraseRequest { + TString PoolName; TVector DirtyDevices; TDuration RequestTimeout; - explicit TSecureEraseRequest( + TSecureEraseRequest( + TString poolName, TVector dirtyDevices, TDuration requestTimeout) - : DirtyDevices(std::move(dirtyDevices)) + : PoolName(std::move(poolName)) + , DirtyDevices(std::move(dirtyDevices)) , RequestTimeout(requestTimeout) {} }; struct TSecureEraseResponse { + TString PoolName; size_t CleanDevices = 0; TSecureEraseResponse() = default; - explicit TSecureEraseResponse(size_t cleanDevices) - : CleanDevices(cleanDevices) + TSecureEraseResponse( + TString poolName, + size_t cleanDevices) + : PoolName(std::move(poolName)) + , CleanDevices(cleanDevices) {} }; diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_ut_lifecycle.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_ut_lifecycle.cpp index c553d8f0bac..ebcc1633b1a 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_ut_lifecycle.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_ut_lifecycle.cpp @@ -2219,6 +2219,263 @@ Y_UNIT_TEST_SUITE(TDiskRegistryTest) UNIT_ASSERT_EQUAL(E_NOT_FOUND, response->Record.GetError().GetCode()); } } + + Y_UNIT_TEST(ShouldSecureEraseDevicesFromDifferentPools) + { + // 1 .Create devices from two different pools with pool kind + // DEVICE_POOL_KIND_LOCAL. Devices are created in the suspended state. + // 2. Set observer for EvCleanupDevicesRequest which check that + // all devices from one message belong to the same pool. + // 3. Call ResumeDevice for all devices. Internally ResumeDevice + // triggers SecureErase. + + const TString poolName1 = "pool-1"; + const auto withPool1 = + WithPool(poolName1, NProto::DEVICE_POOL_KIND_GLOBAL); + auto agent1 = CreateAgentConfig( + "agent-1", + {Device("dev-1", "uuid-1", "rack-1", 10_GB) | withPool1, + Device("dev-2", "uuid-2", "rack-1", 10_GB) | withPool1, + Device("dev-3", "uuid-3", "rack-1", 10_GB) | withPool1}); + + const TString poolName2 = "pool-2"; + const auto withPool2 = + WithPool(poolName2, NProto::DEVICE_POOL_KIND_GLOBAL); + auto agent2 = CreateAgentConfig( + "agent-2", + {Device("dev-4", "uuid-4", "rack-1", 10_GB) | withPool2, + Device("dev-5", "uuid-5", "rack-1", 10_GB) | withPool2, + Device("dev-6", "uuid-6", "rack-1", 10_GB) | withPool2}); + + auto runtime = + TTestRuntimeBuilder().WithAgents({agent1, agent2}).Build(); + + TDiskRegistryClient diskRegistry(*runtime); + diskRegistry.WaitReady(); + diskRegistry.SetWritableState(true); + + diskRegistry.UpdateConfig( + [&] + { + auto config = CreateRegistryConfig(0, {agent1, agent2}); + + auto* pool1 = config.AddDevicePoolConfigs(); + pool1->SetName(poolName1); + pool1->SetKind(NProto::DEVICE_POOL_KIND_LOCAL); + pool1->SetAllocationUnit(10_GB); + + auto* pool2 = config.AddDevicePoolConfigs(); + pool2->SetName(poolName2); + pool2->SetKind(NProto::DEVICE_POOL_KIND_LOCAL); + pool2->SetAllocationUnit(10_GB); + + return config; + }()); + + RegisterAgents(*runtime, 2); + WaitForAgents(*runtime, 2); + + THashMap deviceUuidToPoolName = { + {"uuid-1", poolName1}, + {"uuid-2", poolName1}, + {"uuid-3", poolName1}, + {"uuid-4", poolName2}, + {"uuid-5", poolName2}, + {"uuid-6", poolName2}, + }; + + runtime->SetObserverFunc( + [&](TAutoPtr& event) + { + switch (event->GetTypeRewrite()) { + case TEvDiskRegistryPrivate::EvCleanupDevicesRequest: { + auto& msg = *event->Get< + TEvDiskRegistryPrivate::TEvCleanupDevicesRequest>(); + TSet poolNameSet; + for (auto& device: msg.Devices) { + poolNameSet.insert(deviceUuidToPoolName[device]); + } + // each SecureErase actor handles devices only from one + // pool + UNIT_ASSERT_EQUAL(1, poolNameSet.size()); + break; + } + } + + return TTestActorRuntime::DefaultObserverFunc(event); + }); + + diskRegistry.ResumeDevice("agent-1", "dev-1", /*dryRun=*/false); + diskRegistry.ResumeDevice("agent-2", "dev-4", /*dryRun=*/false); + diskRegistry.ResumeDevice("agent-1", "dev-2", /*dryRun=*/false); + diskRegistry.ResumeDevice("agent-2", "dev-5", /*dryRun=*/false); + diskRegistry.ResumeDevice("agent-1", "dev-3", /*dryRun=*/false); + diskRegistry.ResumeDevice("agent-2", "dev-6", /*dryRun=*/false); + WaitForSecureErase(*runtime, 6); + } + + Y_UNIT_TEST(ShouldSecureEraseDevicesFromDifferentPoolsIndependently) + { + const TString defaultPool = ""; + const TString rotPool = "rot"; + const auto withRotPool = + WithPool(rotPool, NProto::DEVICE_POOL_KIND_GLOBAL); + + TVector agents{ + CreateAgentConfig( + "agent-1", + {Device("path", "uuid-1", "rack-1", 10_GB) | withRotPool, + Device("path", "uuid-2", "rack-1", 10_GB) | withRotPool, + Device("path", "uuid-3", "rack-1", 10_GB) | withRotPool}), + CreateAgentConfig( + "agent-2", + {Device("path", "uuid-4", "rack-1", 10_GB), + Device("path", "uuid-5", "rack-1", 10_GB), + Device("path", "uuid-6", "rack-1", 10_GB)})}; + + THashMap deviceUuidToPoolName = { + {"uuid-1", rotPool}, + {"uuid-2", rotPool}, + {"uuid-3", rotPool}, + {"uuid-4", defaultPool}, + {"uuid-5", defaultPool}, + {"uuid-6", defaultPool}, + }; + + auto runtime = + TTestRuntimeBuilder() + .With( + [] + { + auto config = CreateDefaultStorageConfig(); + + config + .SetMaxDevicesToErasePerDeviceNameForDefaultPoolKind( + 1); + config + .SetMaxDevicesToErasePerDeviceNameForGlobalPoolKind( + 1); + + return config; + }()) + .WithAgents(agents) + .Build(); + + TDiskRegistryClient diskRegistry(*runtime); + diskRegistry.WaitReady(); + diskRegistry.SetWritableState(true); + + diskRegistry.UpdateConfig( + [&] + { + auto config = CreateRegistryConfig(0, agents); + + auto* rot = config.AddDevicePoolConfigs(); + rot->SetName(rotPool); + rot->SetKind(NProto::DEVICE_POOL_KIND_GLOBAL); + rot->SetAllocationUnit(10_GB); + + return config; + }()); + + TVector> secureEraseRequests; + + runtime->SetObserverFunc( + [&](TAutoPtr& event) + { + switch (event->GetTypeRewrite()) { + case TEvDiskAgent::EvSecureEraseDeviceRequest: { + secureEraseRequests.emplace_back(std::move(event)); + return TTestActorRuntime::EEventAction::DROP; + } + } + + return TTestActorRuntime::DefaultObserverFunc(event); + }); + + RegisterAgents(*runtime, 2); + WaitForAgents(*runtime, 2); + + runtime->DispatchEvents( + {.FinalEvents = {{TEvDiskAgent::EvSecureEraseDeviceRequest, 2}}}, + 15s); + UNIT_ASSERT_VALUES_EQUAL(2, secureEraseRequests.size()); + + auto getPoolName = [&](const auto& event) + { + const auto& msg = *event->template Get< + TEvDiskAgent::TEvSecureEraseDeviceRequest>(); + + return deviceUuidToPoolName[msg.Record.GetDeviceUUID()]; + }; + + SortBy(secureEraseRequests, getPoolName); + + UNIT_ASSERT_VALUES_EQUAL( + defaultPool, + getPoolName(secureEraseRequests[0])); + UNIT_ASSERT_VALUES_EQUAL(rotPool, getPoolName(secureEraseRequests[1])); + + runtime->DispatchEvents( + {.FinalEvents = {{TEvDiskAgent::EvSecureEraseDeviceRequest}}}, + 15s); + UNIT_ASSERT_VALUES_EQUAL(2, secureEraseRequests.size()); + + auto sendResponse = [&](auto event) + { + runtime->Send(new IEventHandle( + event->Sender, + event->Recipient, + new TEvDiskAgent::TEvSecureEraseDeviceResponse(), + 0, // flags + event->Cookie)); + }; + + // send the first response for the device from the default pool + sendResponse(std::move(secureEraseRequests[0])); + + runtime->DispatchEvents( + {.FinalEvents = {{TEvDiskAgent::EvSecureEraseDeviceRequest}}}, + 15s); + UNIT_ASSERT_VALUES_EQUAL(3, secureEraseRequests.size()); + UNIT_ASSERT_VALUES_EQUAL( + defaultPool, + getPoolName(secureEraseRequests[2])); + + runtime->DispatchEvents( + {.FinalEvents = {{TEvDiskAgent::EvSecureEraseDeviceRequest}}}, + 15s); + UNIT_ASSERT_VALUES_EQUAL(3, secureEraseRequests.size()); + + // send the second response for the device from the default pool + sendResponse(std::move(secureEraseRequests[2])); + + runtime->DispatchEvents( + {.FinalEvents = {{TEvDiskAgent::EvSecureEraseDeviceRequest}}}, + 15s); + UNIT_ASSERT_VALUES_EQUAL(4, secureEraseRequests.size()); + UNIT_ASSERT_VALUES_EQUAL( + defaultPool, + getPoolName(secureEraseRequests[3])); + + // send the third response for the device from the default pool + sendResponse(std::move(secureEraseRequests[3])); + runtime->DispatchEvents( + {.FinalEvents = {{TEvDiskAgent::EvSecureEraseDeviceRequest}}}, + 15s); + + // there are no new requests - all devices from the default pool are + // clean + UNIT_ASSERT_VALUES_EQUAL(4, secureEraseRequests.size()); + + // send the response for the device from the rot pool + sendResponse(std::move(secureEraseRequests[1])); + runtime->DispatchEvents( + {.FinalEvents = {{TEvDiskAgent::EvSecureEraseDeviceRequest}}}, + 15s); + UNIT_ASSERT_VALUES_EQUAL(5, secureEraseRequests.size()); + UNIT_ASSERT_VALUES_EQUAL(rotPool, getPoolName(secureEraseRequests[4])); + } } } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/disk_registry/testlib/test_env.h b/cloud/blockstore/libs/storage/disk_registry/testlib/test_env.h index 0174a9e4a12..089130b5094 100644 --- a/cloud/blockstore/libs/storage/disk_registry/testlib/test_env.h +++ b/cloud/blockstore/libs/storage/disk_registry/testlib/test_env.h @@ -744,6 +744,7 @@ class TDiskRegistryClient TDuration requestTimeout = {}) { return std::make_unique( + TString{}, std::move(devices), requestTimeout); }