Skip to content

Commit

Permalink
issue-2033: additional check for CreateHandle DupCache entries to avo…
Browse files Browse the repository at this point in the history
…id replying with old responses to new requests with reused request ids (which may happen due to bugs in the client) (#2034)

* issue-2033: additional check for CreateHandle DupCache entries to avoid replying with old responses to new requests with reused request ids (which may happen due to bugs in the client)

* issue-2033: better DupCacheEntryId generation
  • Loading branch information
qkrorlqr authored Sep 15, 2024
1 parent ffbc7bc commit fd37214
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 17 deletions.
1 change: 1 addition & 0 deletions cloud/filestore/libs/diagnostics/critical_events.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ namespace NCloud::NFileStore{
xxx(NodeNotFoundInFollower) \
xxx(NotEnoughResultsInGetNodeAttrBatchResponses) \
xxx(AsyncDestroyHandleFailed) \
xxx(DuplicateRequestId) \
// FILESTORE_CRITICAL_EVENTS

#define FILESTORE_IMPOSSIBLE_EVENTS(xxx) \
Expand Down
35 changes: 26 additions & 9 deletions cloud/filestore/libs/storage/tablet/session.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,12 @@ using TSessionLockMultiMap = THashMultiMap<ui64, TSessionLock*>;

struct TDupCacheEntry: NProto::TDupCacheEntry
{
bool Commited = false;
bool Committed = false;
bool Dropped = false;

TDupCacheEntry(const NProto::TDupCacheEntry& proto, bool commited)
TDupCacheEntry(const NProto::TDupCacheEntry& proto, bool committed)
: NProto::TDupCacheEntry(proto)
, Commited(commited)
, Committed(committed)
{}
};

Expand Down Expand Up @@ -112,7 +113,7 @@ struct TSession
return SubSessions.IsValid();
}

bool HasSeqNo(ui64 seqNo)
bool HasSeqNo(ui64 seqNo) const
{
return SubSessions.HasSeqNo(seqNo);
}
Expand Down Expand Up @@ -154,7 +155,7 @@ struct TSession

ui64 GenerateDupCacheEntryId()
{
return LastDupCacheEntryId++;
return ++LastDupCacheEntryId;
}

void LoadDupCacheEntry(NProto::TDupCacheEntry entry)
Expand All @@ -177,6 +178,21 @@ struct TSession
return nullptr;
}

void DropDupEntry(ui64 requestId)
{
if (!requestId) {
return;
}

auto it = DupCache.find(requestId);
if (it == DupCache.end()) {
return;
}

it->second->Dropped = true;
DupCache.erase(it);
}

TDupCacheEntry* AccessDupEntry(ui64 requestId)
{
if (!requestId) {
Expand All @@ -191,12 +207,12 @@ struct TSession
return nullptr;
}

void AddDupCacheEntry(NProto::TDupCacheEntry proto, bool commited)
void AddDupCacheEntry(NProto::TDupCacheEntry proto, bool committed)
{
Y_ABORT_UNLESS(proto.GetRequestId());
Y_ABORT_UNLESS(proto.GetEntryId());

DupCacheEntries.emplace_back(std::move(proto), commited);
DupCacheEntries.emplace_back(std::move(proto), committed);

auto& entry = DupCacheEntries.back();
auto [_, inserted] = DupCache.emplace(entry.GetRequestId(), &entry);
Expand All @@ -206,7 +222,7 @@ struct TSession
void CommitDupCacheEntry(ui64 requestId)
{
if (auto it = DupCache.find(requestId); it != DupCache.end()) {
it->second->Commited = true;
it->second->Committed = true;
}
}

Expand All @@ -217,7 +233,8 @@ struct TSession
}

auto entry = DupCacheEntries.front();
DupCache.erase(entry.GetRequestId());
const auto erased = DupCache.erase(entry.GetRequestId());
Y_DEBUG_ABORT_UNLESS(erased || entry.Dropped);
DupCacheEntries.pop_front();

return entry.GetEntryId();
Expand Down
17 changes: 15 additions & 2 deletions cloud/filestore/libs/storage/tablet/tablet_actor_createhandle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,23 @@ void TIndexTabletActor::HandleCreateHandle(
}

auto* msg = ev->Get();
if (const auto* e = session->LookupDupEntry(GetRequestId(msg->Record))) {
const auto requestId = GetRequestId(msg->Record);
if (const auto* e = session->LookupDupEntry(requestId)) {
auto response = std::make_unique<TResponse>();
GetDupCacheEntry(e, response->Record);
return NCloud::Reply(ctx, *ev, std::move(response));
// sometimes bugged clients send us duplicate request ids
// see #2033
if (msg->Record.GetName().Empty() && msg->Record.GetNodeId()
!= response->Record.GetNodeAttr().GetId())
{
ReportDuplicateRequestId(TStringBuilder() << "CreateHandle response"
<< " for different NodeId found for RequestId=" << requestId
<< ": " << response->Record.GetNodeAttr().GetId()
<< " != " << msg->Record.GetNodeId());
session->DropDupEntry(requestId);
} else {
return NCloud::Reply(ctx, *ev, std::move(response));
}
}

if (msg->Record.GetFollowerFileSystemId()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ void TIndexTabletActor::HandleCreateNode(
GetDupCacheEntry(e, response->Record);
if (response->Record.GetNode().GetId() == 0) {
// it's an external node which is not yet created in follower
// this check is needed for the case of leader reboot
*response->Record.MutableError() = MakeError(
E_REJECTED,
"node not yet created in follower");
Expand Down
5 changes: 3 additions & 2 deletions cloud/filestore/libs/storage/tablet/tablet_state_sessions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -844,14 +844,15 @@ void TIndexTabletState::GetDupCacheEntry( \
const TDupCacheEntry* entry, \
NProto::T##name##Response& response) \
{ \
if (entry->Commited && entry->Has##name()) { \
if (entry->Committed && entry->Has##name()) { \
response = entry->Get##name(); \
} else if (!entry->Commited) { \
} else if (!entry->Committed) { \
*response.MutableError() = ErrorDuplicate(); \
} else if (!entry->Has##name()) { \
*response.MutableError() = MakeError( \
E_ARGUMENT, TStringBuilder() << "invalid request dup cache type: " \
<< entry->ShortUtf8DebugString().Quote()); \
ReportDuplicateRequestId(response.GetError().GetMessage()); \
} \
} \
// FILESTORE_IMPLEMENT_DUPCACHE
Expand Down
73 changes: 69 additions & 4 deletions cloud/filestore/libs/storage/tablet/tablet_ut_nodes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1081,14 +1081,14 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Nodes)
auto request = tablet.CreateCreateNodeRequest(TCreateNodeArgs::File(RootNodeId, "xxx"));
request->Record.MutableHeaders()->SetRequestId(reqId);

return std::move(request);
return request;
};

auto createOther = [&] (ui64 reqId) {
auto request = tablet.CreateUnlinkNodeRequest(RootNodeId, "xxx", false);
request->Record.MutableHeaders()->SetRequestId(reqId);

return std::move(request);
return request;
};

ui64 nodeId = InvalidNodeId;
Expand All @@ -1104,11 +1104,14 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Nodes)
tablet.SendRequest(createOther(100500));
{
auto response = tablet.RecvUnlinkNodeResponse();
UNIT_ASSERT(HasError(response->Record.GetError()));
UNIT_ASSERT_VALUES_EQUAL_C(
E_ARGUMENT,
response->Record.GetError().GetCode(),
response->Record.GetError().GetMessage());
}
}

Y_UNIT_TEST(ShouldWaitForDupRequestToBeCommited)
Y_UNIT_TEST(ShouldWaitForDupRequestToBeCommitted)
{
TTestEnv env;
env.CreateSubDomain("nfs");
Expand Down Expand Up @@ -1601,6 +1604,68 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Nodes)
UNIT_ASSERT_VALUES_EQUAL(0, stats.GetDeletionMarkersCount());
}
}

Y_UNIT_TEST(ShouldIdentifyRequestIdCollision)
{
TTestEnv env;
env.CreateSubDomain("nfs");

ui32 nodeIdx = env.CreateNode("nfs");
ui64 tabletId = env.BootIndexTablet(nodeIdx);

TIndexTabletClient tablet(env.GetRuntime(), nodeIdx, tabletId);
tablet.InitSession("client", "session");

auto createRequest = [&] (ui64 reqId, ui64 nodeId) {
auto request = tablet.CreateCreateHandleRequest(
nodeId, TCreateHandleArgs::RDWR);
request->Record.MutableHeaders()->SetRequestId(reqId);

return request;
};

const TString name1 = "file1";
const TString name2 = "file2";

const auto nodeId1 = tablet.CreateNode(TCreateNodeArgs::File(
RootNodeId,
name1))->Record.GetNode().GetId();
const auto nodeId2 = tablet.CreateNode(TCreateNodeArgs::File(
RootNodeId,
name2))->Record.GetNode().GetId();

UNIT_ASSERT_VALUES_UNEQUAL(InvalidNodeId, nodeId1);
UNIT_ASSERT_VALUES_UNEQUAL(InvalidNodeId, nodeId2);
UNIT_ASSERT_VALUES_UNEQUAL(nodeId1, nodeId2);

const ui64 requestId = 100500;

tablet.SendRequest(createRequest(requestId, nodeId1));
{
auto response = tablet.RecvCreateHandleResponse();
UNIT_ASSERT_VALUES_EQUAL_C(
S_OK,
response->Record.GetError().GetCode(),
response->Record.GetError().GetMessage());

UNIT_ASSERT_VALUES_EQUAL(
nodeId1,
response->Record.GetNodeAttr().GetId());
}

tablet.SendRequest(createRequest(requestId, nodeId2));
{
auto response = tablet.RecvCreateHandleResponse();
UNIT_ASSERT_VALUES_EQUAL_C(
S_OK,
response->Record.GetError().GetCode(),
response->Record.GetError().GetMessage());

UNIT_ASSERT_VALUES_EQUAL(
nodeId2,
response->Record.GetNodeAttr().GetId());
}
}
}

} // namespace NCloud::NFileStore::NStorage

0 comments on commit fd37214

Please sign in to comment.