From 5fb18547495d28e20889cefb74ece56551105375 Mon Sep 17 00:00:00 2001 From: Evgeny Budilovsky Date: Tue, 23 Jan 2024 12:07:38 +0200 Subject: [PATCH] NBS-179: support using multiple devices from the same rdma endpoint (#209) This commit fixes the problem with connecting to multiple devices on the same host using rdma compound storage. Before the fix same storage was reused to access all devices which resulted in data corruption --- .../libs/service_local/storage_rdma.cpp | 5 +- .../libs/service_local/storage_rdma_ut.cpp | 121 ++++++++++++++++++ .../blockstore/libs/service_local/ut/ya.make | 2 + 3 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 cloud/blockstore/libs/service_local/storage_rdma_ut.cpp diff --git a/cloud/blockstore/libs/service_local/storage_rdma.cpp b/cloud/blockstore/libs/service_local/storage_rdma.cpp index 579a6c10b86..f441ed71f92 100644 --- a/cloud/blockstore/libs/service_local/storage_rdma.cpp +++ b/cloud/blockstore/libs/service_local/storage_rdma.cpp @@ -471,12 +471,14 @@ class TRdmaStorageProvider final private: struct TEndpoint { + TString Uuid; TString Host; ui32 Port; bool operator<(const TEndpoint& other) const { - return std::tie(Host, Port) < std::tie(other.Host, other.Port); + return std::tie(Uuid, Host, Port) < + std::tie(other.Uuid, other.Host, other.Port); } }; @@ -526,6 +528,7 @@ class TRdmaStorageProvider final for (const auto& device: volume.GetDevices()) { auto ep = TEndpoint{ + device.GetDeviceUUID(), device.GetRdmaEndpoint().GetHost(), device.GetRdmaEndpoint().GetPort()}; diff --git a/cloud/blockstore/libs/service_local/storage_rdma_ut.cpp b/cloud/blockstore/libs/service_local/storage_rdma_ut.cpp new file mode 100644 index 00000000000..ae30513596a --- /dev/null +++ b/cloud/blockstore/libs/service_local/storage_rdma_ut.cpp @@ -0,0 +1,121 @@ +#include "storage_rdma.h" + +#include +#include +#include + +#include +#include + +namespace NCloud::NBlockStore::NStorage { + +using namespace NThreading; + +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +#define UNIT_ASSERT_SUCCEEDED(e) \ + UNIT_ASSERT_C(SUCCEEDED(e.GetCode()), e.GetMessage()) + +class TDummyClientEndpoint: public NRdma::IClientEndpoint +{ + TResultOrError AllocateRequest( + NRdma::IClientHandlerPtr handler, + std::unique_ptr context, + size_t requestBytes, + size_t responseBytes) override + { + Y_UNUSED(handler); + Y_UNUSED(context); + Y_UNUSED(requestBytes); + Y_UNUSED(responseBytes); + + return MakeError(E_NOT_IMPLEMENTED); + } + + void SendRequest(NRdma::TClientRequestPtr req, TCallContextPtr callContext) + override + { + Y_UNUSED(req); + Y_UNUSED(callContext); + } +}; + +class TRdmaClientHelper: public NRdma::IClient +{ +public: + TVector> StartedEndpoints; + +public: + TFuture StartEndpoint( + TString host, + ui32 port) noexcept override + { + StartedEndpoints.emplace_back(host, port); + + return MakeFuture( + std::make_shared()); + } + + void Start() override + {} + + void Stop() override + {} +}; + +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + +Y_UNIT_TEST_SUITE(TRdmaStorageTest) +{ + Y_UNIT_TEST(ShouldCreateEndpointForEachDeviceOnSameHost) + { + auto client = std::make_shared(); + auto provider = CreateRdmaStorageProvider( + NCloud::NBlockStore::CreateServerStatsStub(), + client, + ERdmaTaskQueueOpt::DontUse); + + NProto::TVolume volume; + volume.SetDiskId("disk0"); + volume.SetBlockSize(4096); + volume.SetBlocksCount(1000000); + volume.SetStorageMediaKind(NProto::STORAGE_MEDIA_SSD_NONREPLICATED); + + TString hostName = "host1"; + ui32 hostPort = 1111; + for (auto i = 0; i < 2; i++) { + auto* device = volume.AddDevices(); + device->SetDeviceName("dev" + std::to_string(i)); + device->SetDeviceUUID("uuid" + std::to_string(i)); + device->SetAgentId(hostName); + device->SetBlockCount(4'000); + device->SetPhysicalOffset(32'000); + auto* rdma = device->MutableRdmaEndpoint(); + rdma->SetHost(hostName); + rdma->SetPort(hostPort); + } + + auto future = provider->CreateStorage( + volume, + "", + NProto::VOLUME_ACCESS_READ_WRITE); + auto storage = future.GetValue(); + + UNIT_ASSERT_VALUES_EQUAL(2, client->StartedEndpoints.size()); + + for (auto i = 0; i < 2; i++) { + UNIT_ASSERT_VALUES_EQUAL( + hostName, + std::get<0>(client->StartedEndpoints[i])); + UNIT_ASSERT_VALUES_EQUAL( + hostPort, + std::get<1>(client->StartedEndpoints[i])); + } + } +} + +} // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/service_local/ut/ya.make b/cloud/blockstore/libs/service_local/ut/ya.make index 8d18839e536..ab898928586 100644 --- a/cloud/blockstore/libs/service_local/ut/ya.make +++ b/cloud/blockstore/libs/service_local/ut/ya.make @@ -7,12 +7,14 @@ TIMEOUT(180) SRCS( compound_storage_ut.cpp storage_aio_ut.cpp + storage_rdma_ut.cpp storage_null_ut.cpp storage_spdk_ut.cpp ) PEERDIR( cloud/blockstore/libs/server + cloud/blockstore/libs/rdma/iface cloud/storage/core/libs/aio )