diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp index 056a0c66039..72af5bf0901 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp @@ -1149,11 +1149,50 @@ NProto::TError TDiskRegistryState::ReplaceDevice( bool manual, bool* diskStateUpdated) { - try { - if (!diskId) { - return MakeError(E_ARGUMENT, "empty disk id"); - } + Y_ABORT_UNLESS(diskStateUpdated); + *diskStateUpdated = false; + + if (!diskId) { + return MakeError(E_ARGUMENT, "empty disk id"); + } + + if (!Disks.contains(diskId)) { + return MakeError(E_ARGUMENT, TStringBuilder() + << "unknown disk: " << diskId.Quote()); + } + + TDiskState& disk = Disks[diskId]; + auto error = ReplaceDeviceWithoutDiskStateUpdate( + db, + disk, + diskId, + deviceId, + deviceReplacementId, + timestamp, + std::move(message), + manual); + + if (HasError(error)) { + return error; + } + + *diskStateUpdated = TryUpdateDiskState(db, diskId, disk, timestamp); + + return {}; +} + +NProto::TError TDiskRegistryState::ReplaceDeviceWithoutDiskStateUpdate( + TDiskRegistryDatabase& db, + TDiskState& disk, + const TString& diskId, + const TString& deviceId, + const TString& deviceReplacementId, + TInstant timestamp, + TString message, + bool manual) +{ + try { if (!deviceId) { return MakeError(E_ARGUMENT, "empty device id"); } @@ -1163,13 +1202,6 @@ NProto::TError TDiskRegistryState::ReplaceDevice( << "device does not belong to disk " << diskId.Quote()); } - if (!Disks.contains(diskId)) { - return MakeError(E_ARGUMENT, TStringBuilder() - << "unknown disk: " << diskId.Quote()); - } - - TDiskState& disk = Disks[diskId]; - auto it = Find(disk.Devices, deviceId); if (it == disk.Devices.end()) { auto message = ReportDiskRegistryDeviceNotFound( @@ -1208,7 +1240,6 @@ NProto::TError TDiskRegistryState::ReplaceDevice( timestamp, message); if (HasError(error)) { - TryUpdateDiskState(db, diskId, timestamp); return error; } @@ -1282,8 +1313,6 @@ NProto::TError TDiskRegistryState::ReplaceDevice( *it = targetDevice.GetDeviceUUID(); - *diskStateUpdated = TryUpdateDiskState(db, diskId, disk, timestamp); - UpdateAgent(db, *agentPtr); UpdatePlacementGroup(db, diskId, disk, "ReplaceDevice"); @@ -4789,13 +4818,13 @@ void TDiskRegistryState::ApplyAgentStateChange( auto& disk = Disks[diskId]; + diskIds.emplace(diskId); + // check if deviceId is target for migration if (RestartDeviceMigration(timestamp, db, diskId, disk, deviceId)) { continue; } - bool isAffected = true; - if (agent.GetState() == NProto::AGENT_STATE_WARNING) { if (disk.MigrationSource2Target.contains(deviceId)) { // migration already started @@ -4828,10 +4857,9 @@ void TDiskRegistryState::ApplyAgentStateChange( deviceId); if (canReplaceDevice) { - bool updated = false; - - auto error = ReplaceDevice( + auto error = ReplaceDeviceWithoutDiskStateUpdate( db, + disk, diskId, deviceId, "", // no replacement device @@ -4839,31 +4867,22 @@ void TDiskRegistryState::ApplyAgentStateChange( MakeMirroredDiskDeviceReplacementMessage( disk.MasterDiskId, "agent unavailable"), - false, // manual - &updated); + false); // manual if (HasError(error)) { ReportMirroredDiskDeviceReplacementFailure( FormatError(error)); } - - if (!updated) { - isAffected = false; - } } } CancelDeviceMigration(timestamp, db, diskId, disk, deviceId); } - - if (isAffected) { - diskIds.emplace(std::move(diskId)); - } } - for (auto& id: diskIds) { - if (TryUpdateDiskState(db, id, timestamp)) { - affectedDisks.push_back(std::move(id)); + for (const auto& diskId: diskIds) { + if (TryUpdateDiskState(db, diskId, timestamp)) { + affectedDisks.push_back(diskId); } } } @@ -5759,39 +5778,29 @@ void TDiskRegistryState::ApplyDeviceStateChange( return; } - if (device.GetState() == NProto::DEVICE_STATE_ERROR - && disk->MasterDiskId) - { + if (device.GetState() == NProto::DEVICE_STATE_ERROR && disk->MasterDiskId) { const bool canReplaceDevice = CheckIfDeviceReplacementIsAllowed( now, disk->MasterDiskId, device.GetDeviceUUID()); if (canReplaceDevice) { - bool updated = false; - auto error = ReplaceDevice( + auto error = ReplaceDeviceWithoutDiskStateUpdate( db, + *disk, diskId, device.GetDeviceUUID(), - "", // no replacement device + "", // no replacement device now, MakeMirroredDiskDeviceReplacementMessage( disk->MasterDiskId, "device failure"), - false, // manual - &updated); + false); // manual if (HasError(error)) { - ReportMirroredDiskDeviceReplacementFailure( - FormatError(error)); - } - - if (updated) { - affectedDisk = diskId; + ReportMirroredDiskDeviceReplacementFailure(FormatError(error)); } } - - return; } if (TryUpdateDiskState(db, diskId, *disk, now)) { diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.h b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.h index bdb81603255..176b653877f 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.h +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.h @@ -1264,6 +1264,16 @@ class TDiskRegistryState const NProto::TDiskRegistryConfig& newConfig) const; std::optional GetDiskBlockCount(const TDiskId& diskId) const; + + NProto::TError ReplaceDeviceWithoutDiskStateUpdate( + TDiskRegistryDatabase& db, + TDiskState& disk, + const TString& diskId, + const TString& deviceId, + const TString& deviceReplacementId, + TInstant timestamp, + TString message, + bool manual); }; } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut_mirrored_disks.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut_mirrored_disks.cpp index 3c44fdf2224..4187455ec37 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut_mirrored_disks.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut_mirrored_disks.cpp @@ -1806,6 +1806,129 @@ Y_UNIT_TEST_SUITE(TDiskRegistryStateMirroredDisksTest) rt); } + Y_UNIT_TEST(ShouldReturnAffectedDiskListOnAgentFailure) + { + TTestExecutor executor; + executor.WriteTx([&] (TDiskRegistryDatabase db) { + db.InitSchema(); + }); + + auto agentConfig1 = AgentConfig(1, { + Device("dev-1", "uuid-1", "rack-1"), + Device("dev-2", "uuid-2", "rack-1"), + }); + + auto agentConfig2 = AgentConfig(2, { + Device("dev-1", "uuid-3", "rack-2"), + Device("dev-2", "uuid-4", "rack-2"), + }); + + auto agentConfig3 = AgentConfig(3, { + Device("dev-1", "uuid-5", "rack-3"), + }); + + TDiskRegistryState state = TDiskRegistryStateBuilder() + .WithKnownAgents({ + agentConfig1, + agentConfig2, + agentConfig3 + }) + .Build(); + + // Create a mirror-2 disk + executor.WriteTx([&] (TDiskRegistryDatabase db) { + TVector devices; + TVector> replicas; + TVector migrations; + TVector deviceReplacementIds; + auto error = AllocateMirroredDisk( + db, + state, + "disk-1", + 20_GB, + 1, + devices, + replicas, + migrations, + deviceReplacementIds); + UNIT_ASSERT_SUCCESS(error); + UNIT_ASSERT_VALUES_EQUAL(2, devices.size()); + UNIT_ASSERT_VALUES_EQUAL("uuid-1", devices[0].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("uuid-2", devices[1].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL(1, replicas.size()); + UNIT_ASSERT_VALUES_EQUAL(2, replicas[0].size()); + UNIT_ASSERT_VALUES_EQUAL("uuid-3", replicas[0][0].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("uuid-4", replicas[0][1].GetDeviceUUID()); + ASSERT_VECTORS_EQUAL(TVector{}, deviceReplacementIds); + }); + + // All replicas must be online + + { + TDiskInfo replicaInfo; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo("disk-1/0", replicaInfo)); + UNIT_ASSERT_VALUES_EQUAL( + NProto::EDiskState_Name(NProto::DISK_STATE_ONLINE), + NProto::EDiskState_Name(replicaInfo.State)); + } + + { + TDiskInfo replicaInfo; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo("disk-1/1", replicaInfo)); + UNIT_ASSERT_VALUES_EQUAL( + NProto::EDiskState_Name(NProto::DISK_STATE_ONLINE), + NProto::EDiskState_Name(replicaInfo.State)); + } + + // Breaking the first agent + + executor.WriteTx([&] (TDiskRegistryDatabase db) mutable { + TVector affectedDisks; + TDuration timeout; + auto error = state.UpdateAgentState( + db, + agentConfig1.GetAgentId(), + NProto::AGENT_STATE_UNAVAILABLE, + Now(), + "unreachable", + affectedDisks); + + // Now first replica (disk-1/0) should be broken + + TDiskInfo replicaInfo; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo("disk-1/0", replicaInfo)); + UNIT_ASSERT_VALUES_EQUAL( + NProto::EDiskState_Name(NProto::DISK_STATE_TEMPORARILY_UNAVAILABLE), + NProto::EDiskState_Name(replicaInfo.State)); + + // And second one (disk-1/1) should be OK + + replicaInfo = {}; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo("disk-1/1", replicaInfo)); + UNIT_ASSERT_VALUES_EQUAL( + NProto::EDiskState_Name(NProto::DISK_STATE_ONLINE), + NProto::EDiskState_Name(replicaInfo.State)); + + UNIT_ASSERT_VALUES_EQUAL(S_OK, error.GetCode()); + UNIT_ASSERT_VALUES_EQUAL(TDuration::Zero(), timeout); + + // The state of 'disk-1/0' has been changed so 'disk-1/0' should be + // in 'affectedDisks' + ASSERT_VECTORS_EQUAL(TVector{"disk-1/0"}, affectedDisks); + + auto replaced = state.GetAutomaticallyReplacedDevices(); + UNIT_ASSERT_VALUES_EQUAL(1, replaced.size()); + UNIT_ASSERT_VALUES_EQUAL("uuid-1", replaced[0].DeviceId); + + UNIT_ASSERT_SUCCESS(state.MarkReplacementDevice( + Now(), + db, + "disk-1", + "uuid-5", + false)); + }); + } + Y_UNIT_TEST(ShouldProperlyCleanupAutomaticallyReplacedDevices) { TTestExecutor executor; @@ -1979,14 +2102,14 @@ Y_UNIT_TEST_SUITE(TDiskRegistryStateMirroredDisksTest) diskInfo = {}; error = state.GetDiskInfo("disk-1/0", diskInfo); UNIT_ASSERT_VALUES_EQUAL(S_OK, error.GetCode()); - // 2 x state change + 3 x replacement - UNIT_ASSERT_VALUES_EQUAL(5, diskInfo.History.size()); + // 3 x replacement + UNIT_ASSERT_VALUES_EQUAL(3, diskInfo.History.size()); diskInfo = {}; error = state.GetDiskInfo("disk-1/1", diskInfo); UNIT_ASSERT_VALUES_EQUAL(S_OK, error.GetCode()); - // 2 x state change + 3 x replacement - UNIT_ASSERT_VALUES_EQUAL(5, diskInfo.History.size()); + // 3 x replacement + UNIT_ASSERT_VALUES_EQUAL(3, diskInfo.History.size()); auto dirtyDevices = state.GetDirtyDevices(); UNIT_ASSERT_VALUES_EQUAL(6, dirtyDevices.size()); @@ -2400,6 +2523,13 @@ Y_UNIT_TEST_SUITE(TDiskRegistryStateMirroredDisksTest) const auto changeStateTs = Now(); executor.WriteTx([&] (TDiskRegistryDatabase db) mutable { + TDiskInfo diskInfo; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo("disk-1/0", diskInfo)); + UNIT_ASSERT_VALUES_EQUAL( + NProto::EDiskState_Name( + NProto::DISK_STATE_ONLINE), + NProto::EDiskState_Name(diskInfo.State)); + TVector affectedDisks; TDuration timeout; auto error = state.UpdateAgentState( @@ -2412,7 +2542,14 @@ Y_UNIT_TEST_SUITE(TDiskRegistryStateMirroredDisksTest) UNIT_ASSERT_VALUES_EQUAL(S_OK, error.GetCode()); UNIT_ASSERT_VALUES_EQUAL(TDuration::Zero(), timeout); - UNIT_ASSERT_VALUES_EQUAL(0, affectedDisks.size()); + ASSERT_VECTORS_EQUAL(TVector{"disk-1/0"}, affectedDisks); + + diskInfo = {}; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo("disk-1/0", diskInfo)); + UNIT_ASSERT_VALUES_EQUAL( + NProto::EDiskState_Name( + NProto::DISK_STATE_TEMPORARILY_UNAVAILABLE), + NProto::EDiskState_Name(diskInfo.State)); }); UNIT_ASSERT_VALUES_EQUAL(1, criticalEvents->Val()); @@ -2482,6 +2619,116 @@ Y_UNIT_TEST_SUITE(TDiskRegistryStateMirroredDisksTest) }); } + Y_UNIT_TEST(ShouldReportDeviceReplacementFailureUponDeviceFailure) + { + TTestExecutor executor; + executor.WriteTx([&] (TDiskRegistryDatabase db) { + db.InitSchema(); + }); + + auto agentConfig1 = AgentConfig(1, { + Device("dev-1", "uuid-1", "rack-1"), + }); + + auto agentConfig2 = AgentConfig(2, { + Device("dev-1", "uuid-2", "rack-2"), + }); + + auto agentConfig3 = AgentConfig(3, { + Device("dev-1", "uuid-3", "rack-3"), + }); + + TDiskRegistryState state = TDiskRegistryStateBuilder() + .WithKnownAgents({ + agentConfig1, + agentConfig2, + agentConfig3, + }) + .Build(); + + // Creating a mirror-3 (one device per replica) + + executor.WriteTx([&] (TDiskRegistryDatabase db) { + TVector devices; + TVector> replicas; + TVector migrations; + TVector deviceReplacementIds; + auto error = AllocateMirroredDisk( + db, + state, + "disk-1", + 10_GB, + 2, + devices, + replicas, + migrations, + deviceReplacementIds); + UNIT_ASSERT_SUCCESS(error); + UNIT_ASSERT_VALUES_EQUAL(1, devices.size()); + UNIT_ASSERT_VALUES_EQUAL("uuid-1", devices[0].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL(2, replicas.size()); + UNIT_ASSERT_VALUES_EQUAL(1, replicas[0].size()); + UNIT_ASSERT_VALUES_EQUAL("uuid-2", replicas[0][0].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL(1, replicas[1].size()); + UNIT_ASSERT_VALUES_EQUAL("uuid-3", replicas[1][0].GetDeviceUUID()); + ASSERT_VECTORS_EQUAL(TVector{}, deviceReplacementIds); + }); + + auto monitoring = CreateMonitoringServiceStub(); + auto rootGroup = monitoring->GetCounters() + ->GetSubgroup("counters", "blockstore"); + + auto serverGroup = rootGroup->GetSubgroup("component", "server"); + InitCriticalEventsCounter(serverGroup); + + auto criticalEvents = serverGroup->FindCounter( + "AppCriticalEvents/MirroredDiskDeviceReplacementFailure"); + + UNIT_ASSERT_VALUES_EQUAL(0, criticalEvents->Val()); + + executor.WriteTx([&] (TDiskRegistryDatabase db) mutable { + + // A first replica should be OK + + TDiskInfo diskInfo; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo("disk-1/0", diskInfo)); + UNIT_ASSERT_VALUES_EQUAL( + NProto::EDiskState_Name( + NProto::DISK_STATE_ONLINE), + NProto::EDiskState_Name(diskInfo.State)); + + // Breakin a device + + TString affectedDisk; + TDuration timeout; + auto error = state.UpdateDeviceState( + db, + "uuid-1", + NProto::DEVICE_STATE_ERROR, + Now(), + "test", + affectedDisk); + + // Now the first replca is not OK, its state has been changed to + // ERROR so we should see disk-1/0 in affectedDisk + + UNIT_ASSERT_VALUES_EQUAL(S_OK, error.GetCode()); + UNIT_ASSERT_VALUES_EQUAL(TDuration::Zero(), timeout); + UNIT_ASSERT_VALUES_EQUAL("disk-1/0", affectedDisk); + + // Ensuring that disk-1/0 is broken + + diskInfo = {}; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo("disk-1/0", diskInfo)); + UNIT_ASSERT_VALUES_EQUAL( + NProto::EDiskState_Name( + NProto::DISK_STATE_ERROR), + NProto::EDiskState_Name(diskInfo.State)); + }); + + UNIT_ASSERT_VALUES_EQUAL(1, criticalEvents->Val()); + } + Y_UNIT_TEST(ShouldStopAutomaticReplacementIfReplacementRateIsTooHigh) { TTestExecutor executor; @@ -2655,6 +2902,137 @@ Y_UNIT_TEST_SUITE(TDiskRegistryStateMirroredDisksTest) } ASSERT_VECTORS_EQUAL(expectedReplacedDeviceIds, replacedDeviceIds); } + + Y_UNIT_TEST(ShouldReportRateLimitExceededCritEventUponDeviceFailure) + { + TTestExecutor executor; + executor.WriteTx([&] (TDiskRegistryDatabase db) { + db.InitSchema(); + }); + + auto agentConfig1 = AgentConfig(1, { + Device("dev-1", "uuid-1", "rack-1"), + Device("dev-2", "uuid-2", "rack-1"), + Device("dev-3", "uuid-3", "rack-1"), + }); + + auto agentConfig2 = AgentConfig(2, { + Device("dev-1", "uuid-4", "rack-2"), + Device("dev-2", "uuid-5", "rack-2"), + Device("dev-3", "uuid-6", "rack-2"), + }); + + NProto::TStorageServiceConfig storageConfig; + storageConfig.SetAllocationUnitNonReplicatedSSD(10); + storageConfig.SetMaxAutomaticDeviceReplacementsPerHour(1); + + TDiskRegistryState state = TDiskRegistryStateBuilder() + .With(CreateStorageConfig(storageConfig)) + .WithKnownAgents({ + agentConfig1, + agentConfig2, + }) + .Build(); + + auto monitoring = CreateMonitoringServiceStub(); + auto rootGroup = monitoring->GetCounters() + ->GetSubgroup("counters", "blockstore"); + + auto serverGroup = rootGroup->GetSubgroup("component", "server"); + InitCriticalEventsCounter(serverGroup); + + auto criticalEvents = serverGroup->FindCounter( + "AppCriticalEvents/MirroredDiskDeviceReplacementRateLimitExceeded"); + + UNIT_ASSERT_VALUES_EQUAL(0, criticalEvents->Val()); + + executor.WriteTx([&] (TDiskRegistryDatabase db) { + TVector devices; + TVector> replicas; + TVector migrations; + TVector deviceReplacementIds; + auto error = AllocateMirroredDisk( + db, + state, + "disk-1", + 20_GB, + 1, + devices, + replicas, + migrations, + deviceReplacementIds); + UNIT_ASSERT_SUCCESS(error); + UNIT_ASSERT_VALUES_EQUAL(2, devices.size()); + UNIT_ASSERT_VALUES_EQUAL("uuid-1", devices[0].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("uuid-2", devices[1].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL(1, replicas.size()); + UNIT_ASSERT_VALUES_EQUAL(2, replicas[0].size()); + UNIT_ASSERT_VALUES_EQUAL("uuid-4", replicas[0][0].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("uuid-5", replicas[0][1].GetDeviceUUID()); + ASSERT_VECTORS_EQUAL(TVector{}, deviceReplacementIds); + }); + + executor.WriteTx([&] (TDiskRegistryDatabase db) mutable { + TString affectedDisk; + auto error = state.UpdateDeviceState( + db, + "uuid-1", + NProto::DEVICE_STATE_ERROR, + Now(), + "test", // reason + affectedDisk); + + UNIT_ASSERT_VALUES_EQUAL_C(S_OK, error.GetCode(), error); + UNIT_ASSERT_VALUES_EQUAL("", affectedDisk); + }); + + executor.WriteTx([&] (TDiskRegistryDatabase db) mutable { + TString affectedDisk; + auto error = state.UpdateDeviceState( + db, + "uuid-2", + NProto::DEVICE_STATE_ERROR, + Now(), + "test", // reason + affectedDisk); + + UNIT_ASSERT_VALUES_EQUAL_C(S_OK, error.GetCode(), error); + UNIT_ASSERT_VALUES_EQUAL("disk-1/0", affectedDisk); + }); + + TDiskInfo info; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo("disk-1/0", info)); + + UNIT_ASSERT_VALUES_UNEQUAL("uuid-1", info.Devices[0].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("uuid-2", info.Devices[1].GetDeviceUUID()); + + UNIT_ASSERT_EQUAL( + NProto::DEVICE_STATE_ONLINE, + info.Devices[0].GetState()); + + UNIT_ASSERT_EQUAL( + NProto::DEVICE_STATE_ERROR, + info.Devices[1].GetState()); + + UNIT_ASSERT_EQUAL(NProto::DISK_STATE_ERROR, info.State); + + info = {}; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo("disk-1/1", info)); + UNIT_ASSERT_VALUES_EQUAL("uuid-4", info.Devices[0].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("uuid-5", info.Devices[1].GetDeviceUUID()); + + UNIT_ASSERT_EQUAL( + NProto::DEVICE_STATE_ONLINE, + info.Devices[0].GetState()); + + UNIT_ASSERT_EQUAL( + NProto::DEVICE_STATE_ONLINE, + info.Devices[1].GetState()); + + UNIT_ASSERT_EQUAL(NProto::DISK_STATE_ONLINE, info.State); + + UNIT_ASSERT_VALUES_EQUAL(1, criticalEvents->Val()); + } } bool operator==(const TDiskStateUpdate& l, const TDiskStateUpdate& r)