diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_monitoring.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_monitoring.cpp index cf040c1042c..7ac14888610 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_monitoring.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_monitoring.cpp @@ -8,6 +8,7 @@ #include #include +#include namespace NCloud::NBlockStore::NStorage { @@ -161,6 +162,45 @@ void DumpDeviceLink(IOutputStream& out, ui64 tabletId, TStringBuf uuid) << ""; } +void DumpSquare(IOutputStream& out, const TStringBuf& color) +{ + const char* utfBlackSquare = "■"; + const char* nonBreakingSpace = " "; + out << "" + << utfBlackSquare + << nonBreakingSpace + << ""; +} + +using DiskInfoArray = ::google::protobuf::RepeatedPtrField; +auto GetSortedDisksView( + const DiskInfoArray& disks, + const THashSet& brokenDisks) +{ + using TDiskIterator = decltype(disks.begin()); + + TVector diskIndices(disks.size()); + std::iota(diskIndices.begin(), diskIndices.end(), disks.begin()); + + auto sortByFailureThenByPartition = + [&](const TDiskIterator& d1, const TDiskIterator& d2) + { + auto makeComparableTuple = [&brokenDisks](const TDiskIterator& d){ + return std::make_tuple( + !brokenDisks.contains(d->GetDiskId()), + d->GetPlacementPartitionIndex(), + d->GetDiskId() + ); + }; + return makeComparableTuple(d1) < makeComparableTuple(d2); + }; + + std::sort(diskIndices.begin(), diskIndices.end(), sortByFailureThenByPartition); + return diskIndices; +} + } // namespace //////////////////////////////////////////////////////////////////////////////// @@ -1440,65 +1480,126 @@ void TDiskRegistryActor::RenderPlacementGroupList( TABLEHEAD() { TABLER() { TABLEH() { out << "GroupId"; } + TABLEH() { out << "Strategy"; } + TABLEH() { out << "State"; } TABLEH() { out << "Stats"; } TABLEH() { out << "Disks"; } } } - for (const auto& x: State->GetPlacementGroups()) { + for (const auto& [groupId, groupInfo]: State->GetPlacementGroups()) { TABLER() { - TABLED() { - auto it = brokenGroups.find(x.first); + const auto strategy = groupInfo.Config.GetPlacementStrategy(); + const bool isPartitionGroup = + strategy == NProto::PLACEMENT_STRATEGY_PARTITION; - if (it != brokenGroups.end()) { - if (it->second.Recently.GetBrokenPartitionCount() == 1) { - out << "■ "; - } + auto brokenGroupInfo = brokenGroups.FindPtr(groupId); + const size_t brokenPartitionsCount = brokenGroupInfo + ? brokenGroupInfo->Recently.GetBrokenPartitionCount() + : 0; - if (it->second.Recently.GetBrokenPartitionCount() > 1) { - out << "■ "; - } + TABLED() { + const auto* color = brokenPartitionsCount == 0 + ? "green" + : (brokenPartitionsCount == 1 ? "orange" : "red"); + DumpSquare(out, color); + out << groupId; + } + TABLED() { + TStringBuf name = EPlacementStrategy_Name(strategy); + name.AfterPrefix("PLACEMENT_STRATEGY_", name); + out << name; + } + TABLED() { + size_t totalPartitionsCount = isPartitionGroup + ? groupInfo.Config.GetPlacementPartitionCount() + : groupInfo.Config.GetDisks().size(); + + out << Sprintf("%s: Fine: %zu, ", + isPartitionGroup ? "Partitions" : "Disks", + totalPartitionsCount - brokenPartitionsCount); + + if (brokenPartitionsCount > 0) { + out << Sprintf("Broken: %zu, ", + brokenPartitionsCount == 1 ? "orange" : "red", + brokenPartitionsCount); } - out << x.first; + out << Sprintf("Total: %zu
", totalPartitionsCount); } TABLED() { - if (x.second.Full) { - out << "GROUP IS FULL, "; + if (groupInfo.Full) { + out << "GROUP IS FULL
"; } - out << "BiggestDisk: " << x.second.BiggestDiskId + out << "BiggestDisk: " << groupInfo.BiggestDiskId << " (" - << FormatByteSize(x.second.BiggestDiskSize) << ")"; + << FormatByteSize(groupInfo.BiggestDiskSize) << ")"; } TABLED() { TABLE_SORTABLE_CLASS("table table-bordered") { TABLER() { TABLED() { out << "ConfigVersion"; } - TABLED() { out << x.second.Config.GetConfigVersion(); } + TABLED() + { + out << groupInfo.Config.GetConfigVersion(); + } + } + TABLER() { + TABLED() { out << "Disk count"; } + TABLED() + { + out << groupInfo.Config.GetDisks().size(); + } } - TABLER() { - TABLED() { out << "Disks"; } - TABLED() { + TABLED_ATTRS({{"colspan", "2"}}) { TABLE_SORTABLE_CLASS("table table-bordered") { TABLEHEAD() { TABLER() { TABLEH() { out << "DiskId"; } TABLEH() { out << "Racks"; } + if (isPartitionGroup) { + TABLEH() + { + out << "Partition"; + } + } } } - for (const auto& d: x.second.Config.GetDisks()) { + + const auto brokenDisks = brokenGroupInfo + ? brokenGroupInfo->Recently.GetBrokenDisks() + : THashSet(); + + const auto sortedDisks = + GetSortedDisksView( + groupInfo.Config.GetDisks(), + brokenDisks); + + for (const auto& d: sortedDisks) { TABLER() { - TABLED() { DumpDiskLink(out, TabletID(), d.GetDiskId()); } TABLED() { - for (ui32 i = 0; i < d.DeviceRacksSize(); ++i) { - const auto& rack = d.GetDeviceRacks(i); - if (i) { - out << ", "; - } - - out << rack; + const bool isBroken = + brokenDisks.contains( + d->GetDiskId()); + DumpSquare( + out, + isBroken ? "red" + : "green"); + DumpDiskLink( + out, + TabletID(), + d->GetDiskId()); + } + TABLED() { + out << JoinSeq( + ", ", + d->GetDeviceRacks()); + } + if (isPartitionGroup) { + TABLED() { + out << d->GetPlacementPartitionIndex(); } } } diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp index 8eb456dc935..1b6106d15d9 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp @@ -3642,9 +3642,9 @@ THashMap TDiskRegistryState::GatherBrokenGroupsInfo( auto res = groups.try_emplace(groupId, pg->Config.GetPlacementStrategy()); TBrokenGroupInfo& info = res.first->second; - info.Total.Increment(disk.PlacementPartitionIndex); + info.Total.Increment(diskId, disk.PlacementPartitionIndex); if (now - period < disk.StateTs) { - info.Recently.Increment(disk.PlacementPartitionIndex); + info.Recently.Increment(diskId, disk.PlacementPartitionIndex); } } diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.h b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.h index 27ef117ffaa..b1edb95ac73 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.h +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.h @@ -111,9 +111,9 @@ class TBrokenCounter : Strategy(strategy) {} - void Increment(ui32 partitionIndex) + void Increment(const TString& diskId, ui32 partitionIndex) { - ++BrokenDiskCount; + BrokenDisks.insert(diskId); if (Strategy == NProto::EPlacementStrategy::PLACEMENT_STRATEGY_PARTITION) { BrokenPartitions.insert(partitionIndex); } @@ -123,7 +123,7 @@ class TBrokenCounter { switch (Strategy) { case NProto::EPlacementStrategy::PLACEMENT_STRATEGY_SPREAD: - return BrokenDiskCount; + return BrokenDisks.size(); case NProto::EPlacementStrategy::PLACEMENT_STRATEGY_PARTITION: return BrokenPartitions.size(); default: @@ -135,9 +135,14 @@ class TBrokenCounter } } + [[nodiscard]] const THashSet& GetBrokenDisks() const + { + return BrokenDisks; + } + private: NProto::EPlacementStrategy Strategy; - ui32 BrokenDiskCount = 0; + THashSet BrokenDisks; THashSet BrokenPartitions; };