Skip to content

Commit

Permalink
issue-2421: safer code + extra ut for use-intermediate-write-buffer v…
Browse files Browse the repository at this point in the history
…olume tag (#2452)

* issue-2421: nonrepl disk ut with use-intermediate-write-buffer volume tag

* issue-2421: nonrepl disk ut with use-intermediate-write-buffer volume tag - making the intentions a bit more clear

* issue-2421: made local->remote request conversion in partition_nonrepl safer, added some other safety checks and comments
  • Loading branch information
qkrorlqr committed Nov 11, 2024
1 parent 4f1e06b commit ec1f4e3
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 19 deletions.
1 change: 1 addition & 0 deletions cloud/blockstore/libs/diagnostics/critical_events.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ namespace NCloud::NBlockStore {
xxx(DiskRegistryInsertToPendingCleanupFailed) \
xxx(OverlappingRangesDuringMigrationDetected) \
xxx(StartExternalEndpointError) \
xxx(EmptyRequestSgList) \
// BLOCKSTORE_IMPOSSIBLE_EVENTS

////////////////////////////////////////////////////////////////////////////////
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -454,14 +454,35 @@ void TNonreplicatedPartitionActor::HandleWriteBlocksLocal(
return;
}

if (guard.Get().empty()) {
// can happen only if there is a bug in the code of the layers above
// this one
ReportEmptyRequestSgList();
replyError(
ctx,
*requestInfo,
E_ARGUMENT,
"empty SgList in request");
return;
}

// convert local request to remote

// copying request data into a new TIOVector and moving it to msg->Record
// afterwards since msg->Record.Blocks can be holding current request data
// or parts of it
NProto::TIOVector blocks;
SgListCopy(
guard.Get(),
ResizeIOVector(
*msg->Record.MutableBlocks(),
blocks,
msg->Record.BlocksCount,
PartConfig->GetBlockSize()));
*msg->Record.MutableBlocks() = std::move(blocks);

// explicitly clearing request data (SgList) just in case anyone adds some
// code to TDiskAgentWriteActor that tries to use it
msg->Record.Sglist.SetSgList({});

const bool assignVolumeRequestId =
Config->GetAssignIdToWriteAndZeroRequestsEnabled() &&
Expand Down
23 changes: 18 additions & 5 deletions cloud/blockstore/libs/storage/volume/volume_actor_forward.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ void RejectVolumeRequest(

////////////////////////////////////////////////////////////////////////////////

void CopyDataBuffer(TEvService::TEvWriteBlocksLocalRequest& request)
void CopySgListIntoRequestBuffers(
TEvService::TEvWriteBlocksLocalRequest& request)
{
auto& record = request.Record;
auto g = record.Sglist.Acquire();
Expand All @@ -87,7 +88,7 @@ void CopyDataBuffer(TEvService::TEvWriteBlocksLocalRequest& request)
}

template <typename T>
void CopyDataBuffer(T& t)
void CopySgListIntoRequestBuffers(T& t)
{
Y_UNUSED(t);
}
Expand Down Expand Up @@ -748,14 +749,26 @@ void TVolumeActor::ForwardRequest(
}

/*
* Processing overlapping writes. Overlapping writes should not be sent
* to the underlying (storage) layer.
* Support for copying request data from the user-supplied buffer to some
* buffers which we own to protect the layer below Volume from bugged
* guests which modify buffer contents before receiving write response.
*
* Impacts performance (due to extra copying) and is thus not switched on
* by default.
*
* See https://github.com/ydb-platform/nbs/issues/2421
*/
if constexpr (IsWriteMethod<TMethod>) {
if (State->GetUseIntermediateWriteBuffer()) {
CopyDataBuffer(*msg);
CopySgListIntoRequestBuffers(*msg);
}
}

/*
* Processing overlapping writes. Overlapping writes should not be sent
* to the underlying (storage) layer.
*/
if constexpr (IsWriteMethod<TMethod>) {
const auto range = BuildRequestBlockRange(
*msg,
State->GetBlockSize());
Expand Down
63 changes: 50 additions & 13 deletions cloud/blockstore/libs/storage/volume/volume_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9181,7 +9181,9 @@ Y_UNIT_TEST_SUITE(TVolumeTest)
}
}

TVector<TString> WriteToM3DiskWithInflightDataCorruptionAndReadResults(
TVector<TString> WriteToDiskWithInflightDataCorruptionAndReadResults(
NCloud::NProto::EStorageMediaKind mediaKind,
ui32 writeNumberToIntercept,
const TString& tags)
{
NProto::TStorageServiceConfig config;
Expand All @@ -9200,7 +9202,7 @@ Y_UNIT_TEST_SUITE(TVolumeTest)
0,
false,
1,
NCloud::NProto::STORAGE_MEDIA_SSD_MIRROR3,
mediaKind,
1024,
"vol0",
"cloud",
Expand All @@ -9219,15 +9221,13 @@ Y_UNIT_TEST_SUITE(TVolumeTest)

// intercepting write request to one of the replicas
TAutoPtr<IEventHandle> writeToReplica;
THashMap<TActorId, ui32> sender2WriteCount;
ui32 writeNo = 0;
auto obs = [&] (TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& event) {
if (event->GetTypeRewrite()
== TEvService::EvWriteBlocksLocalRequest)
{
auto& writeCount = sender2WriteCount[event->Sender];
++writeCount;
// intercepting write request to the 3rd replica
if (writeCount == 3) {
++writeNo;
if (writeNo == writeNumberToIntercept) {
writeToReplica = event.Release();
return true;
}
Expand Down Expand Up @@ -9288,27 +9288,64 @@ Y_UNIT_TEST_SUITE(TVolumeTest)
return results;
}

Y_UNIT_TEST(ShouldCopyWriteRequestDataBeforeWritingToStorageIfTagIsSet)
Y_UNIT_TEST(ShouldCopyWriteRequestDataBeforeWritingToStorageIfTagIsSetM3)
{
auto results = WriteToM3DiskWithInflightDataCorruptionAndReadResults(
auto results = WriteToDiskWithInflightDataCorruptionAndReadResults(
NCloud::NProto::STORAGE_MEDIA_SSD_MIRROR3,
// 1 - to volume
// 1 - to mirror actor
// 3 - to 3 replicas
1 + 1 + 3,
"use-intermediate-write-buffer");
const TString adata(4_KB, 'a');
const TString bdata(4_KB, 'b');
UNIT_ASSERT_VALUES_EQUAL(adata, results[0]);
UNIT_ASSERT_VALUES_EQUAL(adata, results[1]);
UNIT_ASSERT_VALUES_EQUAL(adata, results[2]);
}

Y_UNIT_TEST(ShouldHaveDifferentDataInReplicasUponInflightBufferCorruption)
Y_UNIT_TEST(ShouldHaveDifferentDataInReplicasUponInflightBufferCorruptionM3)
{
auto results =
WriteToM3DiskWithInflightDataCorruptionAndReadResults("");
auto results = WriteToDiskWithInflightDataCorruptionAndReadResults(
NCloud::NProto::STORAGE_MEDIA_SSD_MIRROR3,
// 1 - to volume
// 1 - to mirror actor
// 3 - to 3 replicas (nonrepl part actors)
1 + 1 + 3,
"");
const TString adata(4_KB, 'a');
const TString bdata(4_KB, 'b');
UNIT_ASSERT_VALUES_EQUAL(adata, results[0]);
UNIT_ASSERT_VALUES_EQUAL(adata, results[1]);
UNIT_ASSERT_VALUES_EQUAL(bdata, results[2]);
}

Y_UNIT_TEST(ShouldCopyWriteRequestDataBeforeWritingToStorageIfTagIsSetNonrepl)
{
auto results = WriteToDiskWithInflightDataCorruptionAndReadResults(
NCloud::NProto::STORAGE_MEDIA_SSD_NONREPLICATED,
// 1 - to volume
// 1 - to nonrepl part actor
1 + 1,
"use-intermediate-write-buffer");
const TString adata(4_KB, 'a');
UNIT_ASSERT_VALUES_EQUAL(adata, results[0]);
UNIT_ASSERT_VALUES_EQUAL(adata, results[1]);
UNIT_ASSERT_VALUES_EQUAL(adata, results[2]);
}

Y_UNIT_TEST(ShouldHaveChangedDataInStorageUponInflightBufferCorruptionNonrepl)
{
auto results = WriteToDiskWithInflightDataCorruptionAndReadResults(
NCloud::NProto::STORAGE_MEDIA_SSD_NONREPLICATED,
// 1 - to volume
// 1 - to nonrepl part actor
1 + 1,
"");
const TString bdata(4_KB, 'b');
UNIT_ASSERT_VALUES_EQUAL(bdata, results[0]);
UNIT_ASSERT_VALUES_EQUAL(bdata, results[1]);
UNIT_ASSERT_VALUES_EQUAL(bdata, results[2]);
}
}

} // namespace NCloud::NBlockStore::NStorage

0 comments on commit ec1f4e3

Please sign in to comment.