From 8588d44e45dd241e86b36d1b015644a7646dd7c5 Mon Sep 17 00:00:00 2001 From: yegorskii <137099343+yegorskii@users.noreply.github.com> Date: Sat, 3 Aug 2024 14:05:20 +0200 Subject: [PATCH] collect barrier gen should match current current tablet generation (#1715) * collect barrier gen should match current current tablet generation * update * fix failed uts because of shifted LastStep --------- Co-authored-by: yegorskii --- .../libs/storage/tablet/tablet_state.cpp | 4 +- .../libs/storage/tablet/tablet_ut_data.cpp | 81 ++++++++++++++++++- 2 files changed, 81 insertions(+), 4 deletions(-) diff --git a/cloud/filestore/libs/storage/tablet/tablet_state.cpp b/cloud/filestore/libs/storage/tablet/tablet_state.cpp index d29ed36f1b6..ac39a29cc63 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_state.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_state.cpp @@ -84,7 +84,9 @@ void TIndexTabletState::LoadState( const TThrottlerConfig& throttlerConfig) { Generation = generation; - LastStep = 0; + // https://github.com/ydb-platform/nbs/issues/1714 + // because of possible race in vdisks we should not start with 0 + LastStep = 1; LastCollectCounter = 0; TruncateBlocksThreshold = config.GetMaxBlocksPerTruncateTx(); diff --git a/cloud/filestore/libs/storage/tablet/tablet_ut_data.cpp b/cloud/filestore/libs/storage/tablet/tablet_ut_data.cpp index c6a55cc4270..cd1ba6543b7 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_ut_data.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_ut_data.cpp @@ -4032,7 +4032,7 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Data) NKikimr::LogoBlobIDFromLogoBlobID(blobPieces[0].GetBlobId()); UNIT_ASSERT_VALUES_EQUAL(tabletId, blobId0.TabletID()); UNIT_ASSERT_VALUES_EQUAL(2, blobId0.Generation()); - UNIT_ASSERT_VALUES_EQUAL(3, blobId0.Step()); + UNIT_ASSERT_VALUES_EQUAL(4, blobId0.Step()); UNIT_ASSERT_VALUES_EQUAL(3, blobId0.Channel()); UNIT_ASSERT_VALUES_EQUAL(0, blobId0.Cookie()); UNIT_ASSERT_VALUES_EQUAL(0, blobId0.PartId()); @@ -4066,7 +4066,7 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Data) NKikimr::LogoBlobIDFromLogoBlobID(blobPieces[1].GetBlobId()); UNIT_ASSERT_VALUES_EQUAL(tabletId, blobId1.TabletID()); UNIT_ASSERT_VALUES_EQUAL(2, blobId1.Generation()); - UNIT_ASSERT_VALUES_EQUAL(10, blobId1.Step()); + UNIT_ASSERT_VALUES_EQUAL(11, blobId1.Step()); // 4_KB block leads to 4 blobs, 16_KB block leads to 2 blobs => this // blob becomes blob #4 => we start from channel #3, write our 1st blob // to it, next blob goes to channel #4, then #5 and the 4th blob goes @@ -4150,7 +4150,7 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Data) NKikimr::LogoBlobIDFromLogoBlobID(blobPieces[0].GetBlobId()); UNIT_ASSERT_VALUES_EQUAL(tabletId, blobId0.TabletID()); UNIT_ASSERT_VALUES_EQUAL(2, blobId0.Generation()); - UNIT_ASSERT_VALUES_EQUAL(5, blobId0.Step()); + UNIT_ASSERT_VALUES_EQUAL(6, blobId0.Step()); UNIT_ASSERT_VALUES_EQUAL(4, blobId0.Channel()); UNIT_ASSERT_VALUES_EQUAL(0, blobId0.Cookie()); UNIT_ASSERT_VALUES_EQUAL(0, blobId0.PartId()); @@ -5586,6 +5586,81 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Data) } } + TABLET_TEST(ShouldNotCollectGarbageWithPreviousGeneration) + { + const auto block = tabletConfig.BlockSize; + + NProto::TStorageConfig storageConfig; + storageConfig.SetCompactionThreshold(999'999); + storageConfig.SetCleanupThreshold(999'999); + storageConfig.SetWriteBlobThreshold(block); + storageConfig.SetCollectGarbageThreshold(block); + + TTestEnv env({}, std::move(storageConfig)); + env.CreateSubDomain("nfs"); + + ui32 nodeIdx = env.CreateNode("nfs"); + ui64 tabletId = env.BootIndexTablet(nodeIdx); + + ui32 barrierGen = 0; + ui32 perGenerationCounter = 0; + ui32 recordGeneration = 0; + ui32 collectStep = 0; + bool firstMessageSeen = false; + env.GetRuntime().SetEventFilter([&] (auto& runtime, auto& event) { + Y_UNUSED(runtime); + + switch (event->GetTypeRewrite()) { + case TEvBlobStorage::EvCollectGarbage: { + const auto* msg = + event->template Get(); + if (msg->TabletId == tabletId && !firstMessageSeen) { + barrierGen = msg->CollectGeneration; + perGenerationCounter = msg->PerGenerationCounter; + recordGeneration = msg->RecordGeneration; + collectStep = msg->CollectStep; + firstMessageSeen = true; + return true; + } + } + } + + return false; + }); + + TIndexTabletClient tablet(env.GetRuntime(), nodeIdx, tabletId); + tablet.InitSession("client", "session"); + + auto id = CreateNode(tablet, TCreateNodeArgs::File(RootNodeId, "test")); + auto handle = CreateHandle(tablet, id); + + // directly written blob + tablet.WriteData(handle, 0, block, 'c'); + + TDispatchOptions options; + options.CustomFinalCondition = [&] { + return barrierGen; + }; + env.GetRuntime().DispatchEvents(options); + + UNIT_ASSERT_VALUES_EQUAL(recordGeneration, barrierGen); + + auto oldBarrierGen = barrierGen; + barrierGen = 0; + perGenerationCounter = 0; + recordGeneration = 0; + collectStep = 0; + firstMessageSeen = false; + + tablet.RebootTablet(); + + env.GetRuntime().DispatchEvents(options); + + UNIT_ASSERT_VALUES_EQUAL(0, collectStep); + UNIT_ASSERT_VALUES_EQUAL(oldBarrierGen + 1, barrierGen); + UNIT_ASSERT_VALUES_EQUAL(1, perGenerationCounter); + } + #undef TABLET_TEST }