diff --git a/cloud/filestore/config/storage.proto b/cloud/filestore/config/storage.proto index 80b4dcfa8be..7e205983a50 100644 --- a/cloud/filestore/config/storage.proto +++ b/cloud/filestore/config/storage.proto @@ -307,6 +307,9 @@ message TStorageConfig // appropriate. optional bool GetNodeAttrBatchEnabled = 358; + // Max number of items to delete during TrimBytes. + optional uint64 TrimBytesItemCount = 359; + // auth token for node registration via ydb discovery api. optional string NodeRegistrationToken = 360; diff --git a/cloud/filestore/libs/storage/core/config.cpp b/cloud/filestore/libs/storage/core/config.cpp index 6a80287caba..81c0e64c7fa 100644 --- a/cloud/filestore/libs/storage/core/config.cpp +++ b/cloud/filestore/libs/storage/core/config.cpp @@ -168,6 +168,7 @@ namespace { xxx(MultiTabletForwardingEnabled, bool, false )\ xxx(GetNodeAttrBatchEnabled, bool, false )\ xxx(AllowFileStoreForceDestroy, bool, false )\ + xxx(TrimBytesItemCount, ui64, 100'000 )\ xxx(NodeRegistrationRootCertsFile, TString, {} )\ xxx(NodeRegistrationCert, TCertificate, {} )\ xxx(NodeRegistrationToken, TString, "root@builtin")\ diff --git a/cloud/filestore/libs/storage/core/config.h b/cloud/filestore/libs/storage/core/config.h index ce99304f5eb..33faf58f36c 100644 --- a/cloud/filestore/libs/storage/core/config.h +++ b/cloud/filestore/libs/storage/core/config.h @@ -221,6 +221,8 @@ class TStorageConfig bool GetAllowFileStoreForceDestroy() const; + ui64 GetTrimBytesItemCount() const; + void Dump(IOutputStream& out) const; void DumpHtml(IOutputStream& out) const; void DumpOverridesHtml(IOutputStream& out) const; diff --git a/cloud/filestore/libs/storage/tablet/model/fresh_bytes.cpp b/cloud/filestore/libs/storage/tablet/model/fresh_bytes.cpp index 459c3c86fa5..3ed2cd16dca 100644 --- a/cloud/filestore/libs/storage/tablet/model/fresh_bytes.cpp +++ b/cloud/filestore/libs/storage/tablet/model/fresh_bytes.cpp @@ -1,4 +1,5 @@ #include "fresh_bytes.h" +#include "verify.h" #include @@ -44,14 +45,14 @@ void TFreshBytes::DeleteBytes( if (lo->second.Offset < offset) { // cutting lo from the right side - Y_DEBUG_ABORT_UNLESS(lo->second.CommitId < commitId); + TABLET_VERIFY_DEBUG(lo->second.CommitId < commitId); auto& loRef = c.Refs[{nodeId, offset}]; loRef = lo->second; loRef.Buf = loRef.Buf.substr(0, offset - loRef.Offset); if (lo->first.End <= end) { // blockRange is not contained strictly inside lo - Y_DEBUG_ABORT_UNLESS(lo != hi); + TABLET_VERIFY_DEBUG(lo != hi); c.Refs.erase(lo++); } } @@ -62,7 +63,7 @@ void TFreshBytes::DeleteBytes( { // cutting hi from the left side // hi might be equal to lo - it's not a problem - Y_DEBUG_ABORT_UNLESS(hi->second.CommitId < commitId); + TABLET_VERIFY_DEBUG(hi->second.CommitId < commitId); const auto shift = end - hi->second.Offset; hi->second.Buf = hi->second.Buf.substr( shift, @@ -88,7 +89,7 @@ void TFreshBytes::AddBytes( if (c.FirstCommitId == InvalidCommitId) { c.FirstCommitId = commitId; } else { - Y_ABORT_UNLESS(commitId >= c.FirstCommitId); + TABLET_VERIFY(commitId >= c.FirstCommitId); } TByteVector buffer(Reserve(data.size()), Allocator); @@ -116,7 +117,7 @@ void TFreshBytes::AddDeletionMarker( { auto& c = Chunks.back(); if (c.FirstCommitId != InvalidCommitId) { - Y_ABORT_UNLESS(commitId >= c.FirstCommitId); + TABLET_VERIFY(commitId >= c.FirstCommitId); } c.TotalDeletedBytes += len; @@ -132,15 +133,15 @@ void TFreshBytes::AddDeletionMarker( void TFreshBytes::Barrier(ui64 commitId) { - Y_ABORT_UNLESS(!Chunks.empty()); + TABLET_VERIFY(!Chunks.empty()); auto& c = Chunks.back(); if (!c.Data.empty() || !c.DeletionMarkers.empty()) { if (!c.Data.empty()) { - Y_ABORT_UNLESS(c.Data.back().Descriptor.MinCommitId <= commitId); + TABLET_VERIFY(c.Data.back().Descriptor.MinCommitId <= commitId); } if (!c.DeletionMarkers.empty()) { - Y_ABORT_UNLESS(c.DeletionMarkers.back().MinCommitId <= commitId); + TABLET_VERIFY(c.DeletionMarkers.back().MinCommitId <= commitId); } Chunks.back().ClosingCommitId = commitId; Chunks.emplace_back(Allocator); @@ -173,23 +174,56 @@ TFlushBytesCleanupInfo TFreshBytes::StartCleanup( return {Chunks.front().Id, Chunks.front().ClosingCommitId}; } -void TFreshBytes::VisitTop(const TChunkVisitor& visitor) +void TFreshBytes::VisitTop( + ui64 itemLimit, + const TChunkVisitor& visitor) { + ui64 cnt = 0; for (const auto& e: Chunks.front().Data) { + if (cnt++ == itemLimit) { + return; + } visitor(e.Descriptor, false); } for (const auto& descriptor: Chunks.front().DeletionMarkers) { + if (cnt++ == itemLimit) { + return; + } visitor(descriptor, true); } } -void TFreshBytes::FinishCleanup(ui64 chunkId) +bool TFreshBytes::FinishCleanup( + ui64 chunkId, + ui64 dataItemCount, + ui64 deletionMarkerCount) { - Y_ABORT_UNLESS(Chunks.size() > 1); - Y_ABORT_UNLESS(Chunks.front().Id == chunkId); + TABLET_VERIFY(Chunks.size() > 1); + TABLET_VERIFY(Chunks.front().Id == chunkId); + + auto& chunk = Chunks.front(); + + const auto dataSize = chunk.Data.size(); + const auto deletionSize = chunk.DeletionMarkers.size(); + if (dataItemCount == dataSize && deletionMarkerCount == deletionSize) { + Chunks.pop_front(); + return true; + } + + const auto check = + dataItemCount <= dataSize && deletionMarkerCount <= deletionSize; + TABLET_VERIFY(check); + + chunk.Data.erase( + chunk.Data.begin(), + std::next(chunk.Data.begin(), dataItemCount)); + + chunk.DeletionMarkers.erase( + chunk.DeletionMarkers.begin(), + std::next(chunk.DeletionMarkers.begin(), deletionMarkerCount)); - Chunks.pop_front(); + return false; } void TFreshBytes::FindBytes( diff --git a/cloud/filestore/libs/storage/tablet/model/fresh_bytes.h b/cloud/filestore/libs/storage/tablet/model/fresh_bytes.h index 7f817a5a7a2..e97b1e77e5d 100644 --- a/cloud/filestore/libs/storage/tablet/model/fresh_bytes.h +++ b/cloud/filestore/libs/storage/tablet/model/fresh_bytes.h @@ -78,6 +78,7 @@ class TFreshBytes IAllocator* Allocator; TDeque Chunks; ui64 LastChunkId = 0; + TString LogTag; public: TFreshBytes(IAllocator* allocator); @@ -94,6 +95,11 @@ class TFreshBytes return std::make_pair(bytes, deletedBytes); } + void UpdateLogTag(TString logTag) + { + LogTag = std::move(logTag); + } + void AddBytes(ui64 nodeId, ui64 offset, TStringBuf data, ui64 commitId); void AddDeletionMarker(ui64 nodeId, ui64 offset, ui64 len, ui64 commitId); @@ -103,8 +109,11 @@ class TFreshBytes ui64 commitId, TVector* entries, TVector* deletionMarkers); - void VisitTop(const TChunkVisitor& visitor); - void FinishCleanup(ui64 chunkId); + void VisitTop(ui64 itemLimit, const TChunkVisitor& visitor); + bool FinishCleanup( + ui64 chunkId, + ui64 dataItemCount, + ui64 deletionMarkerCount); void FindBytes( IFreshBytesVisitor& visitor, diff --git a/cloud/filestore/libs/storage/tablet/model/fresh_bytes_ut.cpp b/cloud/filestore/libs/storage/tablet/model/fresh_bytes_ut.cpp index 412615bf034..efb5b53ac0c 100644 --- a/cloud/filestore/libs/storage/tablet/model/fresh_bytes_ut.cpp +++ b/cloud/filestore/libs/storage/tablet/model/fresh_bytes_ut.cpp @@ -124,20 +124,27 @@ Y_UNIT_TEST_SUITE(TFreshBytesTest) auto visitTop = [&] () { visitedBytes.clear(); visitedDeletionMarkers.clear(); - freshBytes.VisitTop([&] (const TBytes& bytes, bool isDel) { - if (isDel) { - visitedDeletionMarkers.push_back(bytes); - } else { - visitedBytes.push_back(bytes); + constexpr ui64 itemLimit = 100; + freshBytes.VisitTop( + itemLimit, + [&] (const TBytes& bytes, bool isDel) { + if (isDel) { + visitedDeletionMarkers.push_back(bytes); + } else { + visitedBytes.push_back(bytes); + } } - }); + ); }; visitTop(); COMPARE_BYTES(bytes, visitedBytes); COMPARE_BYTES(deletionMarkers, visitedDeletionMarkers); - freshBytes.FinishCleanup(info.ChunkId); + UNIT_ASSERT(freshBytes.FinishCleanup( + info.ChunkId, + visitedBytes.size(), + visitedDeletionMarkers.size())); { TFreshBytesVisitor visitor; @@ -172,7 +179,10 @@ Y_UNIT_TEST_SUITE(TFreshBytesTest) COMPARE_BYTES(bytes, visitedBytes); COMPARE_BYTES(deletionMarkers, visitedDeletionMarkers); - freshBytes.FinishCleanup(info.ChunkId); + UNIT_ASSERT(freshBytes.FinishCleanup( + info.ChunkId, + visitedBytes.size(), + visitedDeletionMarkers.size())); } Y_UNIT_TEST(ShouldInsertIntervalInTheMiddleOfAnotherInterval) @@ -226,6 +236,132 @@ Y_UNIT_TEST_SUITE(TFreshBytesTest) } } + Y_UNIT_TEST(ShouldObeyLimitProvidedForVisitTop) + { + TFreshBytes freshBytes(TDefaultAllocator::Instance()); + + freshBytes.AddBytes(1, 100, "aAa", 10); + freshBytes.AddBytes(1, 101, "bBbB", 11); + freshBytes.AddBytes(1, 50, "cCc", 12); + freshBytes.AddBytes(1, 50, "dDd", 13); + freshBytes.AddBytes(2, 100, "eEeEe", 14); + freshBytes.AddBytes(2, 1000, "fFf", 15); + freshBytes.AddDeletionMarker(2, 100, 3, 16); + + TVector bytes; + TVector deletionMarkers; + auto info = freshBytes.StartCleanup(17, &bytes, &deletionMarkers); + + COMPARE_BYTES( + TVector({ + {1, 100, 3, 10, InvalidCommitId}, + {1, 101, 4, 11, InvalidCommitId}, + {1, 50, 3, 12, InvalidCommitId}, + {1, 50, 3, 13, InvalidCommitId}, + {2, 100, 5, 14, InvalidCommitId}, + {2, 1000, 3, 15, InvalidCommitId}, + }), bytes); + COMPARE_BYTES( + TVector({ + {2, 100, 3, 16, InvalidCommitId}, + }), deletionMarkers); + UNIT_ASSERT_VALUES_EQUAL(17, info.ClosingCommitId); + + TVector visitedBytes; + TVector visitedDeletionMarkers; + + constexpr ui64 itemsLimit = 2; + + auto visitTop = [&] () { + visitedBytes.clear(); + visitedDeletionMarkers.clear(); + freshBytes.VisitTop( + itemsLimit, + [&] (const TBytes& bytes, bool isDel) { + if (isDel) { + visitedDeletionMarkers.push_back(bytes); + } else { + visitedBytes.push_back(bytes); + } + } + ); + }; + + { + TVector expectedBytes { + {1, 100, 3, 10, InvalidCommitId}, + {1, 101, 4, 11, InvalidCommitId}, + }; + TVector expectedDeletionMarkers; + + visitTop(); + COMPARE_BYTES(expectedBytes, visitedBytes); + COMPARE_BYTES(expectedDeletionMarkers, visitedDeletionMarkers); + + UNIT_ASSERT(!freshBytes.FinishCleanup( + info.ChunkId, + visitedBytes.size(), + visitedDeletionMarkers.size())); + } + + { + TVector expectedBytes { + {1, 50, 3, 12, InvalidCommitId}, + {1, 50, 3, 13, InvalidCommitId}, + }; + TVector expectedDeletionMarkers; + + visitTop(); + COMPARE_BYTES(expectedBytes, visitedBytes); + COMPARE_BYTES(expectedDeletionMarkers, visitedDeletionMarkers); + + UNIT_ASSERT(!freshBytes.FinishCleanup( + info.ChunkId, + visitedBytes.size(), + visitedDeletionMarkers.size())); + } + + { + TVector expectedBytes { + {2, 100, 5, 14, InvalidCommitId}, + {2, 1000, 3, 15, InvalidCommitId}, + }; + TVector expectedDeletionMarkers; + + visitTop(); + COMPARE_BYTES(expectedBytes, visitedBytes); + COMPARE_BYTES(expectedDeletionMarkers, visitedDeletionMarkers); + + UNIT_ASSERT(!freshBytes.FinishCleanup( + info.ChunkId, + visitedBytes.size(), + visitedDeletionMarkers.size())); + } + + { + TVector expectedBytes; + TVector expectedDeletionMarkers { + {2, 100, 3, 16, InvalidCommitId}, + }; + + visitTop(); + COMPARE_BYTES(expectedBytes, visitedBytes); + COMPARE_BYTES(expectedDeletionMarkers, visitedDeletionMarkers); + + UNIT_ASSERT(freshBytes.FinishCleanup( + info.ChunkId, + visitedBytes.size(), + visitedDeletionMarkers.size())); + } + + { + TFreshBytesVisitor visitor; + freshBytes.FindBytes(visitor, 1, TByteRange(0, 1000, 4_KB), 14); + COMPARE_BYTES(TVector(), visitor.Bytes); + UNIT_ASSERT_VALUES_EQUAL(TString(), visitor.Data); + } + } + // TODO test all branches of AddBytes // TODO test with multiple chunks diff --git a/cloud/filestore/libs/storage/tablet/model/verify.h b/cloud/filestore/libs/storage/tablet/model/verify.h index fbbda464b15..12703660e5b 100644 --- a/cloud/filestore/libs/storage/tablet/model/verify.h +++ b/cloud/filestore/libs/storage/tablet/model/verify.h @@ -15,3 +15,15 @@ #define TABLET_VERIFY(expr) \ TABLET_VERIFY_C(expr, "") \ // TABLET_VERIFY + +#define TABLET_VERIFY_DEBUG_C(expr, message) \ + STORAGE_VERIFY_DEBUG_C( \ + expr, \ + TWellKnownEntityTypes::FILESYSTEM, \ + LogTag, \ + message) \ +// TABLET_VERIFY_DEBUG_C + +#define TABLET_VERIFY_DEBUG(expr) \ + TABLET_VERIFY_DEBUG_C(expr, "") \ +// TABLET_VERIFY_DEBUG diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_flush_bytes.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_flush_bytes.cpp index acf0fa4e2fd..db6ee06f88a 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_flush_bytes.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_flush_bytes.cpp @@ -901,8 +901,14 @@ void TIndexTabletActor::ExecuteTx_TrimBytes( TIndexTabletDatabase db(tx.DB); - args.TrimmedBytes = - FinishFlushBytes(db, args.ChunkId, args.ProfileLogRequest); + auto result = FinishFlushBytes( + db, + Config->GetTrimBytesItemCount(), + args.ChunkId, + args.ProfileLogRequest); + + args.TrimmedBytes = result.TotalBytesFlushed; + args.TrimmedAll = result.ChunkCompleted; } void TIndexTabletActor::CompleteTx_TrimBytes( @@ -917,28 +923,42 @@ void TIndexTabletActor::CompleteTx_TrimBytes( {}, ProfileLog); + FILESTORE_TRACK( + ResponseSent_Tablet, + args.RequestInfo->CallContext, + "TrimBytes"); + + Metrics.TrimBytes.Update( + 1, + args.TrimmedBytes, + ctx.Now() - args.RequestInfo->StartedTs); + + if (!args.TrimmedAll) { + LOG_DEBUG(ctx, TFileStoreComponents::TABLET, + "%s TrimBytes partially completed (%lu, %lu)", + LogTag.c_str(), + args.ChunkId, + args.TrimmedBytes); + + ExecuteTx( + ctx, + args.RequestInfo, + args.ChunkId); + return; + } + LOG_DEBUG(ctx, TFileStoreComponents::TABLET, "%s TrimBytes completed (%lu, %lu)", LogTag.c_str(), args.ChunkId, args.TrimmedBytes); - FILESTORE_TRACK( - ResponseSent_Tablet, - args.RequestInfo->CallContext, - "TrimBytes"); - BlobIndexOpState.Complete(); FlushState.Complete(); EnqueueBlobIndexOpIfNeeded(ctx); EnqueueCollectGarbageIfNeeded(ctx); EnqueueFlushIfNeeded(ctx); - - Metrics.TrimBytes.Update( - 1, - args.TrimmedBytes, - ctx.Now() - args.RequestInfo->StartedTs); } } // namespace NCloud::NFileStore::NStorage diff --git a/cloud/filestore/libs/storage/tablet/tablet_state.cpp b/cloud/filestore/libs/storage/tablet/tablet_state.cpp index ac39a29cc63..9bfb430d0cf 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_state.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_state.cpp @@ -72,6 +72,7 @@ TIndexTabletState::~TIndexTabletState() void TIndexTabletState::UpdateLogTag(TString tag) { + Impl->FreshBytes.UpdateLogTag(tag); LogTag = std::move(tag); } diff --git a/cloud/filestore/libs/storage/tablet/tablet_state.h b/cloud/filestore/libs/storage/tablet/tablet_state.h index c6c362526b4..4ddea4140b0 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_state.h +++ b/cloud/filestore/libs/storage/tablet/tablet_state.h @@ -116,6 +116,12 @@ struct TCleanupInfo } }; +struct TFlushBytesStats +{ + ui64 TotalBytesFlushed = 0; + bool ChunkCompleted = false; +}; + //////////////////////////////////////////////////////////////////////////////// class TIndexTabletState @@ -717,8 +723,9 @@ FILESTORE_DUPCACHE_REQUESTS(FILESTORE_DECLARE_DUPCACHE) TFlushBytesCleanupInfo StartFlushBytes( TVector* bytes, TVector* deletionMarkers); - ui64 FinishFlushBytes( + TFlushBytesStats FinishFlushBytes( TIndexTabletDatabase& db, + ui64 itemLimit, ui64 chunkId, NProto::TProfileLogRequestInfo& profileLogRequest); diff --git a/cloud/filestore/libs/storage/tablet/tablet_state_data.cpp b/cloud/filestore/libs/storage/tablet/tablet_state_data.cpp index 613a4eef54a..73722aefa8d 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_state_data.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_state_data.cpp @@ -366,34 +366,44 @@ TFlushBytesCleanupInfo TIndexTabletState::StartFlushBytes( deletionMarkers); } -ui64 TIndexTabletState::FinishFlushBytes( +TFlushBytesStats TIndexTabletState::FinishFlushBytes( TIndexTabletDatabase& db, + ui64 itemLimit, ui64 chunkId, NProto::TProfileLogRequestInfo& profileLogRequest) { ui64 sz = 0; ui64 deletedSz = 0; - Impl->FreshBytes.VisitTop([&] (const TBytes& bytes, bool isDeletionMarker) { - db.DeleteFreshBytes(bytes.NodeId, bytes.MinCommitId, bytes.Offset); - if (isDeletionMarker) { - deletedSz += bytes.Length; - } else { - sz += bytes.Length; - } - - auto* range = profileLogRequest.AddRanges(); - range->SetNodeId(bytes.NodeId); - range->SetOffset(bytes.Offset); - range->SetBytes(bytes.Length); + ui64 cnt = 0; + ui64 deletedCnt = 0; + Impl->FreshBytes.VisitTop( + itemLimit, + [&] (const TBytes& bytes, bool isDeletionMarker) { + db.DeleteFreshBytes(bytes.NodeId, bytes.MinCommitId, bytes.Offset); + if (isDeletionMarker) { + deletedSz += bytes.Length; + ++deletedCnt; + } else { + sz += bytes.Length; + ++cnt; + } + + auto* range = profileLogRequest.AddRanges(); + range->SetNodeId(bytes.NodeId); + range->SetOffset(bytes.Offset); + range->SetBytes(bytes.Length); }); - Impl->FreshBytes.FinishCleanup(chunkId); + auto completed = Impl->FreshBytes.FinishCleanup( + chunkId, + cnt, + deletedCnt); auto [freshBytes, deletedFreshBytes] = Impl->FreshBytes.GetTotalBytes(); SetFreshBytesCount(db, freshBytes); SetDeletedFreshBytesCount(db, deletedFreshBytes); - return sz + deletedSz; + return {sz + deletedSz, completed}; } //////////////////////////////////////////////////////////////////////////////// diff --git a/cloud/filestore/libs/storage/tablet/tablet_tx.h b/cloud/filestore/libs/storage/tablet/tablet_tx.h index 8d763973517..8c54d5fb957 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_tx.h +++ b/cloud/filestore/libs/storage/tablet/tablet_tx.h @@ -1558,6 +1558,7 @@ struct TTxIndexTablet const TRequestInfoPtr RequestInfo; const ui64 ChunkId; ui64 TrimmedBytes = 0; + bool TrimmedAll = false; TTrimBytes(TRequestInfoPtr requestInfo, ui64 chunkId) : TProfileAware(EFileStoreSystemRequest::TrimBytes) diff --git a/cloud/storage/core/libs/common/verify.h b/cloud/storage/core/libs/common/verify.h index 1239786075f..c62fb3102fa 100644 --- a/cloud/storage/core/libs/common/verify.h +++ b/cloud/storage/core/libs/common/verify.h @@ -59,7 +59,7 @@ struct TWellKnownEntityTypes TStringBuilder() << entityType \ << "\t" << entityId << "\n"); \ } \ - Y_ABORT("%s", sb.c_str()); \ + Y_ABORT("%s", sb.c_str()); \ } \ } while (false) \ // STORAGE_VERIFY_C @@ -67,3 +67,11 @@ struct TWellKnownEntityTypes #define STORAGE_VERIFY(expr, entityType, entityId) \ STORAGE_VERIFY_C(expr, entityType, entityId, ""); \ // STORAGE_VERIFY + +#ifndef NDEBUG + #define STORAGE_VERIFY_DEBUG STORAGE_VERIFY + #define STORAGE_VERIFY_DEBUG_C STORAGE_VERIFY_C +#else + #define STORAGE_VERIFY_DEBUG Y_DEBUG_ABORT_UNLESS + #define STORAGE_VERIFY_DEBUG_C Y_DEBUG_ABORT_UNLESS +#endif