Skip to content

Commit

Permalink
merge to stable-23-3: filestore LargeDeletionMarkers fixes, multitabl…
Browse files Browse the repository at this point in the history
…et tx fixes, uts (#2220)

* issue-1146: enforce that RO tx can not read data not commited yet by RW tx (#2161)

* issue-1146: enforce that RO tx can not read stale data not commited yet by RW tx

* add `DataObservedByROTxUponCompletionShouldNeverBeStale` test

* issue-1795: fixed TBlockDeletion application in FlushBytes - we should reset only the part of the block that describes the data, not the NodeId, BlockIndex, etc part (#2206)

* issue-2033: not returning stale handles from DupCache for CreateHandle requests (#2191)

* issue-2033: not returning stale handles from DupCache for CreateHandle requests

* issue-2033: stale handle check should only be done in shards

* issue-2033: stale handle check should only be done in shards - fix: in shards & in single-tablet filestores

* issue-1795: fixed usage of uninitialized memory in ut (#2209)

* issue-2207: followers shouldn't check session and use DupCache upon CreateNode/UnlinkNode requests (#2218)

* issue-2207: followers shouldn't check session and use DupCache upon CreateNode/UnlinkNode requests

* issue-2207: reporting crit event upon receiving non-retriable error for a CreateNode/UnlinkNode operation from follower

---------

Co-authored-by: Maxim Deb Natkh <[email protected]>
  • Loading branch information
qkrorlqr and debnatkh authored Oct 4, 2024
1 parent e532490 commit b9351c7
Show file tree
Hide file tree
Showing 12 changed files with 596 additions and 69 deletions.
1 change: 1 addition & 0 deletions cloud/filestore/libs/diagnostics/critical_events.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ namespace NCloud::NFileStore{
xxx(DuplicateRequestId) \
xxx(InvalidDupCacheEntry) \
xxx(GeneratedOrphanNode) \
xxx(ReceivedNodeOpErrorFromFollower) \
// FILESTORE_CRITICAL_EVENTS

#define FILESTORE_IMPOSSIBLE_EVENTS(xxx) \
Expand Down
2 changes: 1 addition & 1 deletion cloud/filestore/libs/storage/service/service_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3751,7 +3751,7 @@ Y_UNIT_TEST_SUITE(TStorageServiceTest)

auto headers = service.InitSession(fsId, "client");

auto error = MakeError(E_FS_INVALID_SESSION, "bad session");
auto error = MakeError(E_FAIL);

env.GetRuntime().SetEventFilter(
[&] (TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& event) {
Expand Down
7 changes: 7 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1066,4 +1066,11 @@ i64 TIndexTabletActor::TMetrics::CalculateNetworkRequestBytes(
return delta;
}

////////////////////////////////////////////////////////////////////////////////

bool TIndexTabletActor::IsShard() const
{
return GetFileSystem().GetShardNo() > 0;
}

} // namespace NCloud::NFileStore::NStorage
2 changes: 2 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_actor.h
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,8 @@ class TIndexTabletActor final
const NActors::TActorContext& ctx,
const TVector<NProto::TOpLogEntry>& opLog);

bool IsShard() const;

private:
template <typename TMethod>
TSession* AcceptRequest(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,19 +60,41 @@ void TIndexTabletActor::HandleCreateHandle(
auto* msg = ev->Get();
const auto requestId = GetRequestId(msg->Record);
if (const auto* e = session->LookupDupEntry(requestId)) {
const bool shouldStoreHandles =
GetFileSystem().GetFollowerFileSystemIds().empty();
auto response = std::make_unique<TResponse>();
// sometimes bugged clients send us duplicate request ids

// sometimes we may receive duplicate request ids - either due to
// client-side bugs or due to our own bugs (e.g. not doing ResetSession
// upon unmount) or ungraceful client reboots (without actual unmounts)
// see #2033
if (!GetDupCacheEntry(e, response->Record)) {
// invalid entry type - it's certainly a request id collision
session->DropDupEntry(requestId);
} else if (msg->Record.GetName().Empty() && msg->Record.GetNodeId()
!= response->Record.GetNodeAttr().GetId())
{
// this handle relates to a different node id => it's certainly a
// request id collision as well
ReportDuplicateRequestId(TStringBuilder() << "CreateHandle response"
<< " for different NodeId found for RequestId=" << requestId
<< ": " << response->Record.GetNodeAttr().GetId()
<< " != " << msg->Record.GetNodeId());
session->DropDupEntry(requestId);
} else if (shouldStoreHandles
&& !HasError(response->Record.GetError())
&& !FindHandle(response->Record.GetHandle()))
{
// there is no open handle associated with this CreateHandleResponse
// even though it may be an actual retry attempt of some old request
// it's still safer to disregard this DupCache entry and try to
// rerun this request
ReportDuplicateRequestId(TStringBuilder() << "CreateHandle response"
<< " with stale handle found for RequestId=" << requestId
<< ": ResponseNodeId=" << response->Record.GetNodeAttr().GetId()
<< " NodeId=" << msg->Record.GetNodeId()
<< " HandleId=" << response->Record.GetHandle());
session->DropDupEntry(requestId);
} else {
return NCloud::Reply(ctx, *ev, std::move(response));
}
Expand Down
85 changes: 52 additions & 33 deletions cloud/filestore/libs/storage/tablet/tablet_actor_createnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,16 +190,22 @@ void TCreateNodeInFollowerActor::HandleCreateNodeResponse(
return;
}

LOG_ERROR(
ctx,
TFileStoreComponents::TABLET_WORKER,
"%s Follower node creation failed for %s, %s with error %s"
const auto message = Sprintf(
"Follower node creation failed for %s, %s with error %s"
", will not retry",
LogTag.c_str(),
Request.GetFileSystemId().c_str(),
Request.GetName().c_str(),
FormatError(msg->GetError()).Quote().c_str());

LOG_ERROR(
ctx,
TFileStoreComponents::TABLET_WORKER,
"%s %s",
LogTag.c_str(),
message.c_str());

ReportReceivedNodeOpErrorFromFollower(message);

ReplyAndDie(ctx, msg->GetError());
return;
}
Expand Down Expand Up @@ -283,29 +289,37 @@ void TIndexTabletActor::HandleCreateNode(
const TEvService::TEvCreateNodeRequest::TPtr& ev,
const TActorContext& ctx)
{
auto* session =
AcceptRequest<TEvService::TCreateNodeMethod>(ev, ctx, ValidateRequest);
if (!session) {
return;
}

auto* msg = ev->Get();
const auto requestId = GetRequestId(msg->Record);
if (const auto* e = session->LookupDupEntry(requestId)) {
auto response = std::make_unique<TEvService::TEvCreateNodeResponse>();
if (GetDupCacheEntry(e, response->Record)) {
if (response->Record.GetNode().GetId() == 0) {
// it's an external node which is not yet created in follower
// this check is needed for the case of leader reboot
*response->Record.MutableError() = MakeError(
E_REJECTED,
"node not yet created in follower");
}

return NCloud::Reply(ctx, *ev, std::move(response));
// DupCache isn't needed for Create/UnlinkNode requests in shards
if (!IsShard()) {
auto* session = AcceptRequest<TEvService::TCreateNodeMethod>(
ev,
ctx,
ValidateRequest);
if (!session) {
return;
}

session->DropDupEntry(requestId);
const auto requestId = GetRequestId(msg->Record);
if (const auto* e = session->LookupDupEntry(requestId)) {
auto response =
std::make_unique<TEvService::TEvCreateNodeResponse>();
if (GetDupCacheEntry(e, response->Record)) {
if (response->Record.GetNode().GetId() == 0) {
// it's an external node which is not yet created in
// follower
// this check is needed for the case of leader reboot
*response->Record.MutableError() = MakeError(
E_REJECTED,
"node not yet created in follower");
}

return NCloud::Reply(ctx, *ev, std::move(response));
}

session->DropDupEntry(requestId);
}
}

ui64 parentNodeId = msg->Record.GetNodeId();
Expand Down Expand Up @@ -342,7 +356,9 @@ bool TIndexTabletActor::PrepareTx_CreateNode(
{
Y_UNUSED(ctx);

FILESTORE_VALIDATE_DUPTX_SESSION(CreateNode, args);
if (!IsShard()) {
FILESTORE_VALIDATE_DUPTX_SESSION(CreateNode, args);
}

TIndexTabletDatabaseProxy db(tx.DB, args.NodeUpdates);

Expand Down Expand Up @@ -433,12 +449,15 @@ void TIndexTabletActor::ExecuteTx_CreateNode(
{
FILESTORE_VALIDATE_TX_ERROR(CreateNode, args);

auto* session = FindSession(args.SessionId);
if (!session) {
auto message = ReportSessionNotFoundInTx(TStringBuilder()
<< "CreateNode: " << args.Request.ShortDebugString());
args.Error = MakeError(E_INVALID_STATE, std::move(message));
return;
TSession* session = nullptr;
if (!IsShard()) {
session = FindSession(args.SessionId);
if (!session) {
auto message = ReportSessionNotFoundInTx(TStringBuilder()
<< "CreateNode: " << args.Request.ShortDebugString());
args.Error = MakeError(E_INVALID_STATE, std::move(message));
return;
}
}

TIndexTabletDatabaseProxy db(tx.DB, args.NodeUpdates);
Expand Down Expand Up @@ -533,7 +552,7 @@ void TIndexTabletActor::ExecuteTx_CreateNode(
// 1. there will be no duplicates - node name is generated by the leader
// 2. the leader serves all file creation operations and has its own
// dupcache
if (!GetFileSystem().GetShardNo()) {
if (!IsShard()) {
AddDupCacheEntry(
db,
session,
Expand Down Expand Up @@ -574,7 +593,7 @@ void TIndexTabletActor::CompleteTx_CreateNode(
// 1. there will be no duplicates - node name is generated by the leader
// 2. the leader serves all file creation operations and has its own
// dupcache
if (!GetFileSystem().GetShardNo()) {
if (!IsShard()) {
CommitDupCacheEntry(args.SessionId, args.RequestId);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class TReadBlockVisitor final
TABLET_VERIFY(!ApplyingByteLayer);

if (BlockMinCommitId < deletion.CommitId) {
Block = {};
Block.Block = {};
}
}

Expand All @@ -90,6 +90,11 @@ class TReadBlockVisitor final
const auto bytesOffset =
static_cast<ui64>(Block.BlockIndex) * BlockSize;
Block.BytesMinCommitId = bytes.MinCommitId;
TABLET_VERIFY_C(
bytes.Offset - bytesOffset <= BlockSize,
"Bytes: " << bytes.Describe()
<< ", Block: " << Block.BlockIndex
<< ", BlockSize: " << BlockSize);
Block.BlockBytes.Intervals.push_back({
IntegerCast<ui32>(bytes.Offset - bytesOffset),
TString(data)
Expand Down
78 changes: 47 additions & 31 deletions cloud/filestore/libs/storage/tablet/tablet_actor_unlinknode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,16 +148,22 @@ void TUnlinkNodeInFollowerActor::HandleUnlinkNodeResponse(
return;
}

LOG_ERROR(
ctx,
TFileStoreComponents::TABLET_WORKER,
"%s Follower node unlinking failed for %s, %s with error %s"
const auto message = Sprintf(
"Follower node unlinking failed for %s, %s with error %s"
", will not retry",
LogTag.c_str(),
Request.GetFileSystemId().c_str(),
Request.GetName().c_str(),
FormatError(msg->GetError()).Quote().c_str());

LOG_ERROR(
ctx,
TFileStoreComponents::TABLET_WORKER,
"%s %s",
LogTag.c_str(),
message.c_str());

ReportReceivedNodeOpErrorFromFollower(message);

ReplyAndDie(ctx, msg->GetError());
return;
}
Expand Down Expand Up @@ -239,20 +245,24 @@ void TIndexTabletActor::HandleUnlinkNode(
return;
}

auto* session = AcceptRequest<TEvService::TUnlinkNodeMethod>(ev, ctx, ValidateRequest);
if (!session) {
return;
}

auto* msg = ev->Get();
const auto requestId = GetRequestId(msg->Record);
if (const auto* e = session->LookupDupEntry(requestId)) {
auto response = std::make_unique<TEvService::TEvUnlinkNodeResponse>();
if (GetDupCacheEntry(e, response->Record)) {
return NCloud::Reply(ctx, *ev, std::move(response));

// DupCache isn't needed for Create/UnlinkNode requests in shards
if (!IsShard()) {
auto* session = AcceptRequest<TEvService::TUnlinkNodeMethod>(ev, ctx, ValidateRequest);
if (!session) {
return;
}

session->DropDupEntry(requestId);
const auto requestId = GetRequestId(msg->Record);
if (const auto* e = session->LookupDupEntry(requestId)) {
auto response = std::make_unique<TEvService::TEvUnlinkNodeResponse>();
if (GetDupCacheEntry(e, response->Record)) {
return NCloud::Reply(ctx, *ev, std::move(response));
}

session->DropDupEntry(requestId);
}
}

auto requestInfo = CreateRequestInfo(
Expand All @@ -277,7 +287,9 @@ bool TIndexTabletActor::PrepareTx_UnlinkNode(
{
Y_UNUSED(ctx);

FILESTORE_VALIDATE_DUPTX_SESSION(UnlinkNode, args);
if (!IsShard()) {
FILESTORE_VALIDATE_DUPTX_SESSION(UnlinkNode, args);
}

TIndexTabletDatabaseProxy db(tx.DB, args.NodeUpdates);

Expand Down Expand Up @@ -397,20 +409,22 @@ void TIndexTabletActor::ExecuteTx_UnlinkNode(
}
}

auto* session = FindSession(args.SessionId);
if (!session) {
auto message = ReportSessionNotFoundInTx(TStringBuilder()
<< "UnlinkNode: " << args.Request.ShortDebugString());
args.Error = MakeError(E_INVALID_STATE, std::move(message));
return;
}
if (!IsShard()) {
auto* session = FindSession(args.SessionId);
if (!session) {
auto message = ReportSessionNotFoundInTx(TStringBuilder()
<< "UnlinkNode: " << args.Request.ShortDebugString());
args.Error = MakeError(E_INVALID_STATE, std::move(message));
return;
}

AddDupCacheEntry(
db,
session,
args.RequestId,
NProto::TUnlinkNodeResponse{},
Config->GetDupCacheEntryCount());
AddDupCacheEntry(
db,
session,
args.RequestId,
NProto::TUnlinkNodeResponse{},
Config->GetDupCacheEntryCount());
}

EnqueueTruncateIfNeeded(ctx);
}
Expand Down Expand Up @@ -450,7 +464,9 @@ void TIndexTabletActor::CompleteTx_UnlinkNode(
return;
}

CommitDupCacheEntry(args.SessionId, args.RequestId);
if (!IsShard()) {
CommitDupCacheEntry(args.SessionId, args.RequestId);
}

// TODO(#1350): support session events for external nodes
NProto::TSessionEvent sessionEvent;
Expand Down
Loading

0 comments on commit b9351c7

Please sign in to comment.