Skip to content

Commit

Permalink
issue #95 NBSNEBIUS-70: if Flush/Compaction/Cleanup/FlushBytes ops ar…
Browse files Browse the repository at this point in the history
…e enqueued before CompactionMap is completely loaded, their EOperationStates should not get stuck in the Enqueued state (#303)
  • Loading branch information
qkrorlqr committed Feb 1, 2024
1 parent f9c7797 commit a8c4716
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 0 deletions.
4 changes: 4 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_actor_cleanup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ void TIndexTabletActor::HandleCleanup(
};

if (!CompactionStateLoadStatus.Finished) {
if (BlobIndexOpState.GetOperationState() == EOperationState::Enqueued) {
BlobIndexOpState.Complete();
}

replyError(MakeError(E_TRY_AGAIN, "compaction state not loaded yet"));
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,10 @@ void TIndexTabletActor::HandleCompaction(
};

if (!CompactionStateLoadStatus.Finished) {
if (BlobIndexOpState.GetOperationState() == EOperationState::Enqueued) {
BlobIndexOpState.Complete();
}

replyError(MakeError(E_TRY_AGAIN, "compaction state not loaded yet"));
return;
}
Expand Down
6 changes: 6 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_actor_counters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,12 @@ void TIndexTabletActor::HandleGetStorageStats(
}
}

stats->SetFlushState(static_cast<ui32>(FlushState.GetOperationState()));
stats->SetBlobIndexOpState(static_cast<ui32>(
BlobIndexOpState.GetOperationState()));
stats->SetCollectGarbageState(static_cast<ui32>(
CollectGarbageState.GetOperationState()));

NCloud::Reply(ctx, *ev, std::move(response));
}

Expand Down
4 changes: 4 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_actor_flush.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,10 @@ void TIndexTabletActor::HandleFlush(
};

if (!CompactionStateLoadStatus.Finished) {
if (FlushState.GetOperationState() == EOperationState::Enqueued) {
FlushState.Complete();
}

replyError(
ctx,
*ev,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,10 @@ void TIndexTabletActor::HandleFlushBytes(
};

if (!CompactionStateLoadStatus.Finished) {
if (BlobIndexOpState.GetOperationState() == EOperationState::Enqueued) {
BlobIndexOpState.Complete();
}

replyError(
ctx,
*ev,
Expand Down
121 changes: 121 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_ut_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "tablet_schema.h"

#include <cloud/filestore/libs/storage/tablet/model/block.h>
#include <cloud/filestore/libs/storage/tablet/model/operation.h>
#include <cloud/filestore/libs/storage/testlib/tablet_client.h>
#include <cloud/filestore/libs/storage/testlib/test_env.h>

Expand Down Expand Up @@ -2882,6 +2883,126 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Data)
tablet.DestroyHandle(handle);
}

TABLET_TEST(BackgroundOperationsShouldNotGetStuckForeverDuringCompactionMapLoading)
{
const auto block = tabletConfig.BlockSize;

NProto::TStorageConfig storageConfig;
// hard to test anything apart from Compaction - it shares
// EOperationState with Cleanup and FlushBytes
storageConfig.SetCompactionThreshold(2);
// Flush has a separate EOperationState
storageConfig.SetFlushThreshold(1);
storageConfig.SetLoadedCompactionRangesPerTx(2);
storageConfig.SetWriteBlobThreshold(2 * block);

TTestEnv env({}, std::move(storageConfig));

env.CreateSubDomain("nfs");

ui32 nodeIdx = env.CreateNode("nfs");
ui64 tabletId = env.BootIndexTablet(nodeIdx);

TIndexTabletClient tablet(
env.GetRuntime(),
nodeIdx,
tabletId,
tabletConfig);
tablet.InitSession("client", "session");

auto id = CreateNode(tablet, TCreateNodeArgs::File(RootNodeId, "test"));
auto handle = CreateHandle(tablet, id);

// generating at least one compaction range
tablet.WriteData(handle, 0, block, 'a');

TAutoPtr<IEventHandle> loadChunk;
ui32 loadChunkCount = 0;
ui32 flushCount = 0;
ui32 compactionCount = 0;
env.GetRuntime().SetEventFilter([&] (auto& runtime, auto& event) {
Y_UNUSED(runtime);

switch (event->GetTypeRewrite()) {
case TEvIndexTabletPrivate::EvFlushRequest: {
++flushCount;
break;
}

case TEvIndexTabletPrivate::EvCompactionRequest: {
++compactionCount;
break;
}

case TEvIndexTabletPrivate::EvLoadCompactionMapChunkRequest: {
++loadChunkCount;

// catching the second chunk - first one should be loaded
// so that we are able to write (and thus trigger our
// background ops)
if (loadChunkCount == 2) {
loadChunk = event.Release();
return true;
}
}
}

return false;
});

// rebooting to trigger compaction map reloading
tablet.RebootTablet();
tablet.RecoverSession();

handle = CreateHandle(tablet, id);

env.GetRuntime().DispatchEvents({}, TDuration::Seconds(1));
UNIT_ASSERT(loadChunk);
UNIT_ASSERT_VALUES_EQUAL(2, loadChunkCount);

// this write should succeed - it targets the range that should be
// loaded at this point of time
tablet.SendWriteDataRequest(handle, 0, block, 'a');
{
auto response = tablet.RecvWriteDataResponse();
UNIT_ASSERT_VALUES_EQUAL(S_OK, response->GetStatus());
}

// Flush should've been triggered and its operation state should've
// been reset to Idle
UNIT_ASSERT_VALUES_EQUAL(1, flushCount);

{
auto response = tablet.GetStorageStats();
const auto& stats = response->Record.GetStats();
UNIT_ASSERT_VALUES_EQUAL(
static_cast<ui32>(EOperationState::Idle),
static_cast<ui32>(stats.GetFlushState()));
}

// this write should succeed - it targets the range that should be
// loaded at this point of time
tablet.SendWriteDataRequest(handle, 0, 2 * block, 'a');
{
auto response = tablet.RecvWriteDataResponse();
UNIT_ASSERT_VALUES_EQUAL(S_OK, response->GetStatus());
}

// Compaction should've been triggered and its operation state should've
// been reset to Idle
UNIT_ASSERT_VALUES_EQUAL(1, compactionCount);

{
auto response = tablet.GetStorageStats();
const auto& stats = response->Record.GetStats();
UNIT_ASSERT_VALUES_EQUAL(
static_cast<ui32>(EOperationState::Idle),
static_cast<ui32>(stats.GetBlobIndexOpState()));
}

tablet.DestroyHandle(handle);
}

#undef TABLET_TEST
}

Expand Down
5 changes: 5 additions & 0 deletions cloud/filestore/private/api/protos/tablet.proto
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@ message TStorageStats

// compaction map range stats
repeated TCompactionRangeStats CompactionRangeStats = 3000;

// background operation states
uint32 FlushState = 4001;
uint32 BlobIndexOpState = 4002;
uint32 CollectGarbageState = 4003;
}

message TGetStorageStatsRequest
Expand Down

0 comments on commit a8c4716

Please sign in to comment.