Skip to content

Commit

Permalink
issue-1795: backpressure for LargeDeletionMarkers
Browse files Browse the repository at this point in the history
  • Loading branch information
qkrorlqr committed Sep 26, 2024
1 parent f2598d8 commit f5e8ffb
Show file tree
Hide file tree
Showing 28 changed files with 427 additions and 58 deletions.
4 changes: 4 additions & 0 deletions cloud/filestore/config/storage.proto
Original file line number Diff line number Diff line change
Expand Up @@ -420,4 +420,8 @@ message TStorageConfig

// settings for ydb config dispatcher service.
optional NCloud.NProto.TConfigDispatcherSettings ConfigDispatcherSettings = 393;

// If the number of blocks marked for deletion via large deletion markers
// exceeds this threshold, large truncate-like operations will be rejected.
optional uint64 LargeDeletionMarkersThresholdForBackpressure = 394;
}
1 change: 1 addition & 0 deletions cloud/filestore/libs/diagnostics/critical_events.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ namespace NCloud::NFileStore{
xxx(AsyncDestroyHandleFailed) \
xxx(DuplicateRequestId) \
xxx(InvalidDupCacheEntry) \
xxx(GeneratedOrphanNode) \
// FILESTORE_CRITICAL_EVENTS

#define FILESTORE_IMPOSSIBLE_EVENTS(xxx) \
Expand Down
11 changes: 6 additions & 5 deletions cloud/filestore/libs/storage/core/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,12 @@ using TAliases = NProto::TStorageConfig::TFilestoreAliases;
xxx(MaxBlocksPerTruncateTx, ui32, 0 /*TODO: 32GiB/4KiB*/ )\
xxx(MaxTruncateTxInflight, ui32, 10 )\
\
xxx(MaxFileBlocks, ui32, 300_GB / 4_KB )\
xxx(LargeDeletionMarkersEnabled, bool, false )\
xxx(LargeDeletionMarkerBlocks, ui64, 1_GB / 4_KB )\
xxx(LargeDeletionMarkersThreshold, ui64, 128_GB / 4_KB )\
xxx(LargeDeletionMarkersCleanupThreshold, ui64, 1_TB / 4_KB )\
xxx(MaxFileBlocks, ui32, 300_GB / 4_KB )\
xxx(LargeDeletionMarkersEnabled, bool, false )\
xxx(LargeDeletionMarkerBlocks, ui64, 1_GB / 4_KB )\
xxx(LargeDeletionMarkersThreshold, ui64, 128_GB / 4_KB )\
xxx(LargeDeletionMarkersCleanupThreshold, ui64, 1_TB / 4_KB )\
xxx(LargeDeletionMarkersThresholdForBackpressure, ui64, 10_TB / 4_KB )\
\
xxx(CompactionRetryTimeout, TDuration, TDuration::Seconds(1) )\
xxx(BlobIndexOpsPriority, \
Expand Down
1 change: 1 addition & 0 deletions cloud/filestore/libs/storage/core/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ class TStorageConfig
ui64 GetLargeDeletionMarkerBlocks() const;
ui64 GetLargeDeletionMarkersThreshold() const;
ui64 GetLargeDeletionMarkersCleanupThreshold() const;
ui64 GetLargeDeletionMarkersThresholdForBackpressure() const;

bool GetMultipleStageRequestThrottlingEnabled() const;

Expand Down
5 changes: 4 additions & 1 deletion cloud/filestore/libs/storage/tablet/tablet_actor.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ class TIndexTabletActor final
std::atomic<i64> NodesOpenForReadingBySingleSession{0};
std::atomic<i64> NodesOpenForReadingByMultipleSessions{0};

std::atomic<i64> OrphanNodesCount{0};

NMetrics::TDefaultWindowCalculator MaxUsedQuota{0};
using TLatHistogram =
NMetrics::THistogram<NMetrics::EHistUnit::HU_TIME_MICROSECONDS>;
Expand Down Expand Up @@ -216,7 +218,8 @@ class TIndexTabletActor final
const TChannelsStats& channelsStats,
const TReadAheadCacheStats& readAheadStats,
const TNodeIndexCacheStats& nodeIndexCacheStats,
const TNodeToSessionCounters& nodeToSessionCounters);
const TNodeToSessionCounters& nodeToSessionCounters,
const TMiscNodeStats& miscNodeStats);
} Metrics;

const IProfileLogPtr ProfileLog;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,15 @@ void TIndexTabletActor::ExecuteTx_AllocateData(
if (args.CommitId == InvalidCommitId) {
return RebootTabletOnCommitOverflow(ctx, "AllocateData");
}
ZeroRange(
auto e = ZeroRange(
db,
args.NodeId,
args.CommitId,
TByteRange(args.Offset, minBorder - args.Offset, GetBlockSize()));
if (HasError(e)) {
args.Error = std::move(e);
return;
}
}

if (!needExtend) {
Expand Down
13 changes: 10 additions & 3 deletions cloud/filestore/libs/storage/tablet/tablet_actor_counters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,8 @@ void TIndexTabletActor::TMetrics::Register(
NodesOpenForReadingByMultipleSessions,
EMetricType::MT_ABSOLUTE);

REGISTER_AGGREGATABLE_SUM(OrphanNodesCount, EMetricType::MT_ABSOLUTE);

// Throttling
REGISTER_LOCAL(MaxReadBandwidth, EMetricType::MT_ABSOLUTE);
REGISTER_LOCAL(MaxWriteBandwidth, EMetricType::MT_ABSOLUTE);
Expand Down Expand Up @@ -400,7 +402,8 @@ void TIndexTabletActor::TMetrics::Update(
const TChannelsStats& channelsStats,
const TReadAheadCacheStats& readAheadStats,
const TNodeIndexCacheStats& nodeIndexCacheStats,
const TNodeToSessionCounters& nodeToSessionCounters)
const TNodeToSessionCounters& nodeToSessionCounters,
const TMiscNodeStats& miscNodeStats)
{
const ui32 blockSize = fileSystem.GetBlockSize();

Expand Down Expand Up @@ -472,6 +475,8 @@ void TIndexTabletActor::TMetrics::Update(
NodesOpenForReadingByMultipleSessions,
nodeToSessionCounters.NodesOpenForReadingByMultipleSessions);

Store(OrphanNodesCount, miscNodeStats.OrphanNodesCount);

BusyIdleCalc.OnUpdateStats();
}

Expand Down Expand Up @@ -520,7 +525,8 @@ void TIndexTabletActor::RegisterStatCounters()
CalculateChannelsStats(),
CalculateReadAheadCacheStats(),
CalculateNodeIndexCacheStats(),
GetNodeToSessionCounters());
GetNodeToSessionCounters(),
GetMiscNodeStats());

Metrics.Register(fsId, storageMediaKind);
}
Expand Down Expand Up @@ -566,7 +572,8 @@ void TIndexTabletActor::HandleUpdateCounters(
CalculateChannelsStats(),
CalculateReadAheadCacheStats(),
CalculateNodeIndexCacheStats(),
GetNodeToSessionCounters());
GetNodeToSessionCounters(),
GetMiscNodeStats());
SendMetricsToExecutor(ctx);

UpdateCountersScheduled = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,12 +309,16 @@ void TIndexTabletActor::ExecuteTx_CreateHandle(
} else if (args.FollowerId.Empty()
&& HasFlag(args.Flags, NProto::TCreateHandleRequest::E_TRUNCATE))
{
Truncate(
auto e = Truncate(
db,
args.TargetNodeId,
args.WriteCommitId,
args.TargetNode->Attrs.GetSize(),
0);
if (HasError(e)) {
args.Error = std::move(e);
return;
}

auto attrs = CopyAttrs(args.TargetNode->Attrs, E_CM_CMTIME);
attrs.SetSize(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,19 @@ void TIndexTabletActor::ExecuteTx_DestroyHandle(
if (args.Node->Attrs.GetLinks() == 0 &&
!HasOpenHandles(args.Node->NodeId))
{
RemoveNode(
auto e = RemoveNode(
db,
*args.Node,
args.Node->MinCommitId,
commitId);

if (HasError(e)) {
WriteOrphanNode(db, TStringBuilder()
<< "DestroyHandle: " << args.SessionId
<< ", Handle: " << args.Request.GetHandle()
<< ", RemoveNode: " << args.Node->NodeId
<< ", Error: " << FormatError(e), args.Node->NodeId);
}
}

EnqueueTruncateIfNeeded(ctx);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -308,11 +308,18 @@ void TIndexTabletActor::ExecuteTx_DestroySession(

auto it = args.Nodes.find(nodeId);
if (it != args.Nodes.end() && !HasOpenHandles(nodeId)) {
RemoveNode(
auto e = RemoveNode(
db,
*it,
it->MinCommitId,
commitId);

if (HasError(e)) {
WriteOrphanNode(db, TStringBuilder()
<< "DestroySession: " << args.SessionId
<< ", RemoveNode: " << nodeId
<< ", Error: " << FormatError(e), nodeId);
}
}
}

Expand Down
15 changes: 12 additions & 3 deletions cloud/filestore/libs/storage/tablet/tablet_actor_loadstate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ bool TIndexTabletActor::PrepareTx_LoadState(
db.ReadSessionHistoryEntries(args.SessionHistory),
db.ReadOpLog(args.OpLog),
db.ReadLargeDeletionMarkers(args.LargeDeletionMarkers),
db.ReadOrphanNodes(args.OrphanNodeIds),
};

bool ready = std::accumulate(
Expand Down Expand Up @@ -231,9 +232,16 @@ void TIndexTabletActor::CompleteTx_LoadState(

LOG_INFO_S(ctx, TFileStoreComponents::TABLET,
LogTag << " Initializing tablet state");
LOG_INFO_S(ctx, TFileStoreComponents::TABLET,
LogTag << " Read " << args.LargeDeletionMarkers.size()
<< " large deletion markers");
if (args.LargeDeletionMarkers) {
LOG_INFO_S(ctx, TFileStoreComponents::TABLET,
LogTag << " Read " << args.LargeDeletionMarkers.size()
<< " large deletion markers");
}
if (args.OrphanNodeIds) {
LOG_INFO_S(ctx, TFileStoreComponents::TABLET,
LogTag << " Read " << args.OrphanNodeIds.size()
<< " orphan nodes");
}

LoadState(
Executor()->Generation(),
Expand All @@ -242,6 +250,7 @@ void TIndexTabletActor::CompleteTx_LoadState(
args.FileSystemStats,
args.TabletStorageInfo,
args.LargeDeletionMarkers,
args.OrphanNodeIds,
config);
UpdateLogTag();

Expand Down
11 changes: 10 additions & 1 deletion cloud/filestore/libs/storage/tablet/tablet_actor_renamenode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,13 +322,22 @@ void TIndexTabletActor::ExecuteTx_RenameNode(
}

// remove target ref and unlink target node
UnlinkNode(
auto e = UnlinkNode(
db,
args.NewParentNode->NodeId,
args.NewName,
*args.NewChildNode,
args.NewChildRef->MinCommitId,
args.CommitId);

if (HasError(e)) {
const auto nodeId = args.NewChildNode->NodeId;
WriteOrphanNode(db, TStringBuilder()
<< "RenameNode: " << args.SessionId
<< ", ParentNodeId: " << args.NewParentNode->NodeId
<< ", NodeId: " << nodeId
<< ", Error: " << FormatError(e), nodeId);
}
} else {
// remove target ref
UnlinkExternalNode(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,11 +142,18 @@ void TIndexTabletActor::ExecuteTx_ResetSession(
nodeId,
it->Attrs.GetSize());

RemoveNode(
auto e = RemoveNode(
db,
*it,
it->MinCommitId,
commitId);

if (HasError(e)) {
WriteOrphanNode(db, TStringBuilder()
<< "DestroySession: " << args.SessionId
<< ", RemoveNode: " << nodeId
<< ", Error: " << FormatError(e), nodeId);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,13 +167,18 @@ void TIndexTabletActor::ExecuteTx_SetNodeAttr(
attrs.SetCTime(update.GetCTime());
}
if (HasFlag(flags, NProto::TSetNodeAttrRequest::F_SET_ATTR_SIZE)) {
Truncate(
auto e = Truncate(
db,
args.NodeId,
args.CommitId,
attrs.GetSize(),
update.GetSize());

if (HasError(e)) {
args.Error = e;
return;
}

attrs.SetSize(update.GetSize());
}

Expand Down
12 changes: 9 additions & 3 deletions cloud/filestore/libs/storage/tablet/tablet_actor_truncate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,13 +316,17 @@ void TIndexTabletActor::ExecuteTx_TruncateRange(
return RebootTabletOnCommitOverflow(ctx, "TruncateRange");
}

auto e = TruncateRange(db, args.NodeId, commitId, args.Range);
if (HasError(e)) {
args.Error = std::move(e);
return;
}

AddRange(
args.NodeId,
args.Range.Offset,
args.Range.Length,
args.ProfileLogRequest);

TruncateRange(db, args.NodeId, commitId, args.Range);
}

void TIndexTabletActor::CompleteTx_TruncateRange(
Expand All @@ -343,7 +347,9 @@ void TIndexTabletActor::CompleteTx_TruncateRange(
args.NodeId,
args.Range.Describe().c_str());

auto response = std::make_unique<TEvIndexTabletPrivate::TEvTruncateRangeResponse>();
auto response =
std::make_unique<TEvIndexTabletPrivate::TEvTruncateRangeResponse>(
std::move(args.Error));
NCloud::Reply(ctx, *args.RequestInfo, std::move(response));

EnqueueCollectGarbageIfNeeded(ctx);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -383,13 +383,18 @@ void TIndexTabletActor::ExecuteTx_UnlinkNode(

db.WriteOpLogEntry(args.OpLogEntry);
} else {
UnlinkNode(
auto e = UnlinkNode(
db,
args.ParentNodeId,
args.Name,
*args.ChildNode,
args.ChildRef->MinCommitId,
args.CommitId);

if (HasError(e)) {
args.Error = std::move(e);
return;
}
}

auto* session = FindSession(args.SessionId);
Expand Down
11 changes: 7 additions & 4 deletions cloud/filestore/libs/storage/tablet/tablet_actor_zerorange.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ void TIndexTabletActor::ExecuteTx_ZeroRange(
args.Range.Length,
args.ProfileLogRequest);

ZeroRange(db, args.NodeId, commitId, args.Range);
args.Error = ZeroRange(db, args.NodeId, commitId, args.Range);
}

void TIndexTabletActor::CompleteTx_ZeroRange(
Expand All @@ -100,12 +100,15 @@ void TIndexTabletActor::CompleteTx_ZeroRange(
ProfileLog);

LOG_DEBUG(ctx, TFileStoreComponents::TABLET,
"%s ZeroRange %lu %s completed",
"%s ZeroRange %lu %s completed: %s",
LogTag.c_str(),
args.NodeId,
args.Range.Describe().c_str());
args.Range.Describe().c_str(),
FormatError(args.Error).Quote().c_str());

auto response = std::make_unique<TEvIndexTabletPrivate::TEvZeroRangeResponse>();
auto response =
std::make_unique<TEvIndexTabletPrivate::TEvZeroRangeResponse>(
std::move(args.Error));
NCloud::Reply(ctx, *args.RequestInfo, std::move(response));

EnqueueCollectGarbageIfNeeded(ctx);
Expand Down
Loading

0 comments on commit f5e8ffb

Please sign in to comment.