From 44cd61672757e3d4d71b794f5f593e570bce7131 Mon Sep 17 00:00:00 2001 From: Andrei Strelkovskii Date: Fri, 20 Sep 2024 14:34:28 +0300 Subject: [PATCH] issue-1993: resetting CurrentBackgroundBlobIndexOp if FlushBytes couldn't be enqueued due to Flush being already enqueued or running; enqueueing Flush and BlobIndexOp upon backpressure error; tracking BackpressurePeriod and rebooting tablets if this period is too long; added more backpressure info to monpage (#2080) * issue-1993: enqueueing BlobIndexOp upon Flush shortcut, enqueueing Flush and BlobIndexOp upon backpressure error, tracking BackpressurePeriod and rebooting tablets if this period is too long, added more backpressure info to monpage * issue-1993: better * issue-1993: properly resetting CurrentBackgroundBlobIndexOp * issue-1993: properly resetting CurrentBackgroundBlobIndexOp - fixed 128_KB ut * issue-1993: better --- cloud/blockstore/config/storage.proto | 8 ++ cloud/filestore/config/storage.proto | 14 +++ cloud/filestore/libs/storage/core/config.cpp | 3 +- cloud/filestore/libs/storage/core/config.h | 1 + .../libs/storage/tablet/tablet_actor.cpp | 25 +++++- .../libs/storage/tablet/tablet_actor.h | 1 + .../tablet/tablet_actor_compaction.cpp | 1 + .../tablet/tablet_actor_monitoring.cpp | 20 ++++- .../storage/tablet/tablet_actor_writedata.cpp | 4 - .../libs/storage/tablet/tablet_ut_data.cpp | 89 ++++++++++++++++++- 10 files changed, 155 insertions(+), 11 deletions(-) diff --git a/cloud/blockstore/config/storage.proto b/cloud/blockstore/config/storage.proto index 00505917309..9af13b640b5 100644 --- a/cloud/blockstore/config/storage.proto +++ b/cloud/blockstore/config/storage.proto @@ -46,6 +46,14 @@ enum EVolumePreemptionType PREEMPTION_MOVE_LEAST_HEAVY = 2; } +//////////////////////////////////////////////////////////////////////////////// +// +// !!!IMPORTANT!!! +// Even though StorageServiceConfig is usually stored in textual format, it may +// be overridden for some tablets and in this case it's stored in binary format +// in the tablet's localdb. So binary backward-compatibility should be +// maintained. +// //////////////////////////////////////////////////////////////////////////////// message TStorageServiceConfig diff --git a/cloud/filestore/config/storage.proto b/cloud/filestore/config/storage.proto index 59a8ec64ce0..bbb09da9690 100644 --- a/cloud/filestore/config/storage.proto +++ b/cloud/filestore/config/storage.proto @@ -16,6 +16,14 @@ enum EBlobIndexOpsPriority BIOP_FAIR = 2; } +//////////////////////////////////////////////////////////////////////////////// +// +// !!!IMPORTANT!!! +// Even though StorageConfig is usually stored in textual format, it may be +// overridden for some tablets and in this case it's stored in binary format +// in the tablet's localdb. So binary backward-compatibility should be +// maintained. +// //////////////////////////////////////////////////////////////////////////////// message TStorageConfig @@ -402,4 +410,10 @@ message TStorageConfig // uncontrollable behaviour change for production systems. TODO: gradually // enable this flag everywhere and make this behaviour the new default. optional bool UseMixedBlocksInsteadOfAliveBlocksInCompaction = 391; + + // IndexTabletActor will suicide (and thus reboot) after observing + // consecutive backpressure errors for this period of time. Needed to + // automatically recover after various races that may happen during index + // tablet startup due to bugs. + optional uint32 MaxBackpressurePeriodBeforeSuicide = 392; // in ms } diff --git a/cloud/filestore/libs/storage/core/config.cpp b/cloud/filestore/libs/storage/core/config.cpp index 90f556a4209..d25335010c0 100644 --- a/cloud/filestore/libs/storage/core/config.cpp +++ b/cloud/filestore/libs/storage/core/config.cpp @@ -170,7 +170,8 @@ using TAliases = NProto::TStorageConfig::TFilestoreAliases; xxx(NegativeEntryTimeout, TDuration, TDuration::Zero() )\ xxx(AttrTimeout, TDuration, TDuration::Zero() )\ xxx(MaxOutOfOrderCompactionMapLoadRequestsInQueue, ui32, 5 )\ - xxx(MaxBackpressureErrorsBeforeSuicide, ui32, 1000 )\ + xxx(MaxBackpressureErrorsBeforeSuicide, ui32, 1000 )\ + xxx(MaxBackpressurePeriodBeforeSuicide, TDuration, TDuration::Minutes(10))\ \ xxx(NewLocalDBCompactionPolicyEnabled, bool, false )\ \ diff --git a/cloud/filestore/libs/storage/core/config.h b/cloud/filestore/libs/storage/core/config.h index 568029cecee..d77c8d27c07 100644 --- a/cloud/filestore/libs/storage/core/config.h +++ b/cloud/filestore/libs/storage/core/config.h @@ -204,6 +204,7 @@ class TStorageConfig bool GetConfigsDispatcherServiceEnabled() const; ui32 GetMaxBackpressureErrorsBeforeSuicide() const; + TDuration GetMaxBackpressurePeriodBeforeSuicide() const; TDuration GetGenerateBlobIdsReleaseCollectBarrierTimeout() const; diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor.cpp index 6a2b8a3bc84..a148082bd22 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor.cpp @@ -359,9 +359,20 @@ NProto::TError TIndexTabletActor::ValidateWriteRequest( EnqueueFlushIfNeeded(ctx); EnqueueBlobIndexOpIfNeeded(ctx); - if (CompactionStateLoadStatus.Finished && - ++BackpressureErrorCount >= - Config->GetMaxBackpressureErrorsBeforeSuicide()) + TDuration backpressurePeriod; + if (CompactionStateLoadStatus.Finished) { + if (!BackpressurePeriodStart) { + BackpressurePeriodStart = ctx.Now(); + } + + ++BackpressureErrorCount; + backpressurePeriod = ctx.Now() - BackpressurePeriodStart; + } + + if (BackpressureErrorCount >= + Config->GetMaxBackpressureErrorsBeforeSuicide() + || backpressurePeriod >= + Config->GetMaxBackpressurePeriodBeforeSuicide()) { LOG_WARN( ctx, @@ -373,11 +384,19 @@ NProto::TError TIndexTabletActor::ValidateWriteRequest( Suicide(ctx); } + EnqueueFlushIfNeeded(ctx); + EnqueueBlobIndexOpIfNeeded(ctx); + return MakeError( E_REJECTED, TStringBuilder() << "rejected due to backpressure: " << message); } + // this request passed the backpressure check => tablet is not stuck + // anywhere, we can reset our backpressure error counter + BackpressureErrorCount = 0; + BackpressurePeriodStart = TInstant::Zero(); + return NProto::TError{}; } diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor.h b/cloud/filestore/libs/storage/tablet/tablet_actor.h index cef15e7de1a..ef2af6bf376 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor.h +++ b/cloud/filestore/libs/storage/tablet/tablet_actor.h @@ -258,6 +258,7 @@ class TIndexTabletActor final NProto::TStorageConfig StorageConfigOverride; ui32 BackpressureErrorCount = 0; + TInstant BackpressurePeriodStart; const NBlockCodecs::ICodec* BlobCodec; diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_compaction.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_compaction.cpp index 967155b6603..4f05c8288ea 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_compaction.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_compaction.cpp @@ -409,6 +409,7 @@ void TIndexTabletActor::EnqueueBlobIndexOpIfNeeded(const TActorContext& ctx) // Flush blocked since FlushBytes op rewrites some fresh blocks as // blobs if (!FlushState.Enqueue()) { + StartBackgroundBlobIndexOp(); CompleteBlobIndexOp(); if (!IsBlobIndexOpsQueueEmpty()) { EnqueueBlobIndexOpIfNeeded(ctx); diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_monitoring.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_monitoring.cpp index 01c198b4b65..4ee4a1fc41c 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_monitoring.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_monitoring.cpp @@ -1066,9 +1066,25 @@ void TIndexTabletActor::HandleHttpInfo_Default( &message); TAG(TH3) { out << "Backpressure"; } if (!isWriteAllowed) { - out << "
" << "Write not allowed: " << message << "
"; + DIV_CLASS("alert alert-danger") { + out << "Write NOT allowed: " << message; + } } else { - out << "
Write allowed
"; + DIV_CLASS("alert") { + out << "Write allowed: " << message; + } + } + if (BackpressurePeriodStart || BackpressureErrorCount) { + DIV_CLASS("alert") { + out << "Backpressure errors: " << BackpressureErrorCount; + } + DIV_CLASS("alert") { + out << "Backpressure period start: " << BackpressurePeriodStart; + } + DIV_CLASS("alert") { + out << "Backpressure period: " + << (ctx.Now() - BackpressurePeriodStart); + } } TABLE_CLASS("table table-bordered") { diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_writedata.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_writedata.cpp index bf0e4760d9b..8336bcd9c12 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_writedata.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_writedata.cpp @@ -105,10 +105,6 @@ void TIndexTabletActor::HandleWriteData( return; } - // this request passed the backpressure check => tablet is not stuck - // anywhere, we can reset our backpressure error counter - BackpressureErrorCount = 0; - // either rejected or put into queue if (ThrottleIfNeeded(ev, ctx)) { return; diff --git a/cloud/filestore/libs/storage/tablet/tablet_ut_data.cpp b/cloud/filestore/libs/storage/tablet/tablet_ut_data.cpp index 58664599484..478586de12f 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_ut_data.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_ut_data.cpp @@ -2672,7 +2672,8 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Data) storageConfig.SetCleanupThresholdForBackpressure(999'999); storageConfig.SetFlushBytesThresholdForBackpressure(1_GB); storageConfig.SetFlushThresholdForBackpressure(block); - storageConfig.SetMaxBackpressureErrorsBeforeSuicide(2); + storageConfig.SetMaxBackpressureErrorsBeforeSuicide(999999); + storageConfig.SetMaxBackpressurePeriodBeforeSuicide(60'000); // 1m TTestEnv env({}, std::move(storageConfig)); env.CreateSubDomain("nfs"); @@ -2713,6 +2714,20 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Data) return false; }); + // backpressure due to FlushThresholdForBackpressure + // should not cause tablet reboot since the backpressure period hasn't + // passed yet + tablet.SendWriteDataRequest(handle, 0, block, '0'); + { + auto response = tablet.RecvWriteDataResponse(); + UNIT_ASSERT_VALUES_EQUAL(E_REJECTED, response->GetStatus()); + } + + env.GetRuntime().DispatchEvents({}, TDuration::Seconds(1)); + UNIT_ASSERT(!poisonPillObserved); + + env.GetRuntime().AdvanceCurrentTime(TDuration::Minutes(1)); + // backpressure due to FlushThresholdForBackpressure // should cause tablet reboot tablet.SendWriteDataRequest(handle, 0, block, '0'); @@ -2725,6 +2740,78 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Data) UNIT_ASSERT(poisonPillObserved); } + TABLET_TEST(FlushBytesShouldReenqueueAfterAttemptToEnqueueItDuringFlush) + { + const auto block = tabletConfig.BlockSize; + + NProto::TStorageConfig storageConfig; + storageConfig.SetWriteBlobThreshold(1_GB); + storageConfig.SetCompactionThreshold(999'999); + storageConfig.SetCleanupThreshold(999'999); + storageConfig.SetFlushThreshold(2 * block); + storageConfig.SetFlushBytesThreshold(block); + + TTestEnv env({}, std::move(storageConfig)); + env.CreateSubDomain("nfs"); + + ui32 nodeIdx = env.CreateNode("nfs"); + ui64 tabletId = env.BootIndexTablet(nodeIdx); + + TIndexTabletClient tablet( + env.GetRuntime(), + nodeIdx, + tabletId, + tabletConfig); + tablet.InitSession("client", "session"); + + auto id = CreateNode(tablet, TCreateNodeArgs::File(RootNodeId, "test")); + ui64 handle = CreateHandle(tablet, id); + + TAutoPtr flush; + TAutoPtr flushBytesCompletion; + + env.GetRuntime().SetEventFilter( + [&] (auto& runtime, TAutoPtr& event) + { + Y_UNUSED(runtime); + + switch (event->GetTypeRewrite()) { + case TEvIndexTabletPrivate::EvFlushRequest: { + UNIT_ASSERT(!flush); + flush = event.Release(); + return true; + } + + case TEvIndexTabletPrivate::EvFlushBytesCompleted: { + UNIT_ASSERT(!flushBytesCompletion); + flushBytesCompletion = event.Release(); + return true; + } + } + + return false; + }); + + // triggering Flush + tablet.WriteData(handle, 0, 2 * block, '0'); // 2 fresh blocks + env.GetRuntime().DispatchEvents({}, TDuration::MilliSeconds(100)); + // Flush caught + UNIT_ASSERT(flush); + + // triggering FlushBytes + // FlushBytes shouldn't get triggered since Flush is already enqueued + tablet.WriteData(handle, 0, block / 2, '0'); // fresh bytes + tablet.WriteData(handle, 0, block / 2, '0'); // fresh bytes + env.GetRuntime().DispatchEvents({}, TDuration::MilliSeconds(100)); + + // releasing Flush + env.GetRuntime().Send(flush.Release(), 1 /* node index */); + env.GetRuntime().DispatchEvents({}, TDuration::Seconds(1)); + + // FlushBytes should get enqueued and should successfully run and finish + UNIT_ASSERT(flushBytesCompletion); + } + TABLET_TEST(ShouldReadUnAligned) { const auto block = tabletConfig.BlockSize;