Skip to content

Commit

Permalink
add checksum mismatch sensor per volume (#1612) (#1616)
Browse files Browse the repository at this point in the history
  • Loading branch information
WilyTiger committed Jul 16, 2024
1 parent 40030ab commit 97bc0cf
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 0 deletions.
5 changes: 5 additions & 0 deletions cloud/blockstore/libs/storage/core/disk_counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,10 @@ struct TSimpleDiskCounters
EPublishingPolicy::DiskRegistryBased,
TSimpleCounter::ECounterType::Generic,
ECounterExpirationPolicy::Permanent};
TCounter ChecksumMismatches{
EPublishingPolicy::DiskRegistryBased,
TSimpleCounter::ECounterType::Generic,
ECounterExpirationPolicy::Permanent};

static constexpr TMeta AllCounters[] = {
MakeMeta<&TSimpleDiskCounters::BytesCount>(),
Expand Down Expand Up @@ -233,6 +237,7 @@ struct TSimpleDiskCounters
MakeMeta<&TSimpleDiskCounters::HasBrokenDevice>(),
MakeMeta<&TSimpleDiskCounters::HasBrokenDeviceSilent>(),
MakeMeta<&TSimpleDiskCounters::ScrubbingProgress>(),
MakeMeta<&TSimpleDiskCounters::ChecksumMismatches>(),
};
};
static_assert(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ void TMirrorPartitionActor::CompareChecksums(const TActorContext& ctx)
DescribeRange(GetScrubbingRange()).c_str(),
checksums[i]);
}
++ChecksumMismatches;
ReportMirroredDiskChecksumMismatch();

if (Config->GetResyncRangeAfterScrubbing()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ class TMirrorPartitionActor final
ui64 ScrubbingThroughput = 0;
TInstant ScrubbingRangeStarted;
bool ResyncRangeStarted = false;
ui32 ChecksumMismatches = 0;

public:
TMirrorPartitionActor(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ void TMirrorPartitionActor::SendStats(const TActorContext& ctx)
}
}

stats->Simple.ChecksumMismatches.Value = ChecksumMismatches;
stats->Simple.ScrubbingProgress.Value =
100 * GetScrubbingRange().Start / State.GetBlockCount();
stats->Cumulative.ScrubbingThroughput.Value = ScrubbingThroughput;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1184,6 +1184,7 @@ Y_UNIT_TEST_SUITE(TMirrorPartitionTest)
2 * 4_MB,
counters.Cumulative.ScrubbingThroughput.Value);
UNIT_ASSERT_VALUES_EQUAL(33, counters.Simple.ScrubbingProgress.Value);
UNIT_ASSERT_VALUES_EQUAL(0, counters.Simple.ChecksumMismatches.Value);
}

Y_UNIT_TEST(ShouldFindChecksumMismatch)
Expand Down Expand Up @@ -1234,6 +1235,7 @@ Y_UNIT_TEST_SUITE(TMirrorPartitionTest)
true);

UNIT_ASSERT_VALUES_EQUAL(2, mirroredDiskChecksumMismatch->Val());
UNIT_ASSERT_VALUES_EQUAL(2, counters.Simple.ChecksumMismatches.Value);

const auto range3 = TBlockRange64::WithLength(1025, 50);
env.WriteMirror(range3, 'A');
Expand All @@ -1250,6 +1252,7 @@ Y_UNIT_TEST_SUITE(TMirrorPartitionTest)
runtime.AdvanceCurrentTime(UpdateCountersInterval);
runtime.DispatchEvents({}, TDuration::MilliSeconds(50));
}
UNIT_ASSERT_VALUES_EQUAL(3, counters.Simple.ChecksumMismatches.Value);
UNIT_ASSERT_VALUES_EQUAL(3, mirroredDiskChecksumMismatch->Val());

// check that all ranges was resynced and there is no more mismatches
Expand All @@ -1264,6 +1267,7 @@ Y_UNIT_TEST_SUITE(TMirrorPartitionTest)
runtime.AdvanceCurrentTime(UpdateCountersInterval);
runtime.DispatchEvents({}, TDuration::MilliSeconds(50));
}
UNIT_ASSERT_VALUES_EQUAL(3, counters.Simple.ChecksumMismatches.Value);
UNIT_ASSERT_VALUES_EQUAL(3, mirroredDiskChecksumMismatch->Val());
}

Expand Down

0 comments on commit 97bc0cf

Please sign in to comment.