Skip to content

Commit

Permalink
NEBDUTY-965: avoid data race when reading WaitMode from rdma client c…
Browse files Browse the repository at this point in the history
…onfig

Rdma client config can be modified during initial hand shake that's why CM
thread overwrites it by original config when Qp created in
`TClientEndpoint::CreateQP`

thread sanitizer reports errors since this Config.WaitMode is used in
completion poller thread. To avoid this error WaitMode is read only once during
the initialization and of the endpoint and we don't modify it any more

Thread sanitizer error looks like that
```
WARNING: ThreadSanitizer: data race (pid=2354960)
  Write of size 8 at 0x7b5c00000118 by thread T2:
    #0 __tsan_memcpy contrib/libs/clang16-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp:3165:3 (cloud-blockstore-libs-rdma-impl-ut+0x22f79f4) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #1 NCloud::NBlockStore::NRdma::(anonymous namespace)::TClientEndpoint::CreateQP() cloud/blockstore/libs/rdma/impl/client.cpp:591:16 (cloud-blockstore-libs-rdma-impl-ut+0x49e974a) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #2 NCloud::NBlockStore::NRdma::(anonymous namespace)::TClient::BeginConnect(NCloud::NBlockStore::NRdma::(anonymous namespace)::TClientEndpoint*) cloud/blockstore/libs/rdma/impl/client.cpp:1876:19 (cloud-blockstore-libs-rdma-impl-ut+0x49e85dc) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #3 NCloud::NBlockStore::NRdma::(anonymous namespace)::TClient::HandleConnectionEvent(std::__y1::unique_ptr<rdma_cm_event, int (*)(rdma_cm_event*)>) cloud/blockstore/libs/rdma/impl/client.cpp:1736:13 (cloud-blockstore-libs-rdma-impl-ut+0x49bbfb0) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #4 non-virtual thunk to NCloud::NBlockStore::NRdma::(anonymous namespace)::TClient::HandleConnectionEvent(std::__y1::unique_ptr<rdma_cm_event, int (*)(rdma_cm_event*)>) cloud/blockstore/libs/rdma/impl/client.cpp (cloud-blockstore-libs-rdma-impl-ut+0x49bc761) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #5 NCloud::NBlockStore::NRdma::(anonymous namespace)::TConnectionPoller::HandleConnectionEvents() cloud/blockstore/libs/rdma/impl/client.cpp:1282:27 (cloud-blockstore-libs-rdma-impl-ut+0x49d3dca) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #6 NCloud::NBlockStore::NRdma::(anonymous namespace)::TConnectionPoller::ThreadProc() cloud/blockstore/libs/rdma/impl/client.cpp:1254:25 (cloud-blockstore-libs-rdma-impl-ut+0x49d3870) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #7 non-virtual thunk to NCloud::NBlockStore::NRdma::(anonymous namespace)::TConnectionPoller::ThreadProc() cloud/blockstore/libs/rdma/impl/client.cpp (cloud-blockstore-libs-rdma-impl-ut+0x49d39d9) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #8 void* (anonymous namespace)::ThreadProcWrapper<ISimpleThread>(void*) util/system/thread.cpp:383:45 (cloud-blockstore-libs-rdma-impl-ut+0x2680796) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #9 (anonymous namespace)::TPosixThread::ThreadProxy(void*) util/system/thread.cpp:244:20 (cloud-blockstore-libs-rdma-impl-ut+0x268b82e) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
  Previous read of size 4 at 0x7b5c00000118 by thread T1:
    #0 NCloud::NBlockStore::NRdma::(anonymous namespace)::TClientEndpoint::AbortRequests() cloud/blockstore/libs/rdma/impl/client.cpp:814:16 (cloud-blockstore-libs-rdma-impl-ut+0x49c4494) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #1 NCloud::NBlockStore::NRdma::(anonymous namespace)::TCompletionPoller::HandlePollEvent(epoll_event const&) cloud/blockstore/libs/rdma/impl/client.cpp:1428:31 (cloud-blockstore-libs-rdma-impl-ut+0x49c4042) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #2 NCloud::NBlockStore::NRdma::(anonymous namespace)::TCompletionPoller::HandlePollEvents() cloud/blockstore/libs/rdma/impl/client.cpp:1446:17 (cloud-blockstore-libs-rdma-impl-ut+0x49c3736) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #3 void NCloud::NBlockStore::NRdma::(anonymous namespace)::TCompletionPoller::Execute<(NCloud::NBlockStore::NRdma::EWaitMode)0>() cloud/blockstore/libs/rdma/impl/client.cpp:1530:21 (cloud-blockstore-libs-rdma-impl-ut+0x49c3341) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #4 NCloud::NBlockStore::NRdma::(anonymous namespace)::TCompletionPoller::ThreadProc() cloud/blockstore/libs/rdma/impl/client.cpp:1398:17 (cloud-blockstore-libs-rdma-impl-ut+0x49c1c18) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #5 non-virtual thunk to NCloud::NBlockStore::NRdma::(anonymous namespace)::TCompletionPoller::ThreadProc() cloud/blockstore/libs/rdma/impl/client.cpp (cloud-blockstore-libs-rdma-impl-ut+0x49c1cf9) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #6 void* (anonymous namespace)::ThreadProcWrapper<ISimpleThread>(void*) util/system/thread.cpp:383:45 (cloud-blockstore-libs-rdma-impl-ut+0x2680796) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #7 (anonymous namespace)::TPosixThread::ThreadProxy(void*) util/system/thread.cpp:244:20 (cloud-blockstore-libs-rdma-impl-ut+0x268b82e) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
```
  • Loading branch information
budevg committed Apr 3, 2024
1 parent 764cd54 commit e412341
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions cloud/blockstore/libs/rdma/impl/client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ class TClientEndpoint final
// config might be adjusted during initial handshake
TClientConfigPtr OriginalConfig;
TClientConfig Config;
const EWaitMode WaitMode;
bool ResetConfig = false;

TCompletionPoller* Poller = nullptr;
Expand Down Expand Up @@ -524,6 +525,7 @@ TClientEndpoint::TClientEndpoint(
, Reconnect(config->MaxReconnectDelay)
, OriginalConfig(std::move(config))
, Config(*OriginalConfig)
, WaitMode(Config.WaitMode)
{
// user data attached to connection events
Connection->context = this;
Expand Down Expand Up @@ -769,14 +771,14 @@ void TClientEndpoint::SendRequest(
Counters->RequestEnqueued();
InputRequests.Enqueue(std::move(req));

if (Config.WaitMode == EWaitMode::Poll) {
if (WaitMode == EWaitMode::Poll) {
RequestEvent.Set();
}
}

bool TClientEndpoint::HandleInputRequests()
{
if (Config.WaitMode == EWaitMode::Poll) {
if (WaitMode == EWaitMode::Poll) {
RequestEvent.Clear();
}

Expand Down Expand Up @@ -811,7 +813,7 @@ bool TClientEndpoint::AbortRequests() noexcept
{
bool ret = false;

if (Config.WaitMode == EWaitMode::Poll) {
if (WaitMode == EWaitMode::Poll) {
DisconnectEvent.Clear();
}

Expand Down Expand Up @@ -856,7 +858,7 @@ bool TClientEndpoint::HandleCompletionEvents()
{
ibv_cq* cq = CompletionQueue.get();

if (Config.WaitMode == EWaitMode::Poll) {
if (WaitMode == EWaitMode::Poll) {
Verbs->GetCompletionEvent(cq);
Verbs->AckCompletionEvents(cq, 1);
Verbs->RequestCompletionEvent(cq, 0);
Expand Down Expand Up @@ -1113,7 +1115,7 @@ void TClientEndpoint::SetError() noexcept
RDMA_ERROR("flush error: " << e.what());
}

if (Config.WaitMode == EWaitMode::Poll) {
if (WaitMode == EWaitMode::Poll) {
DisconnectEvent.Set();
}
}
Expand Down

0 comments on commit e412341

Please sign in to comment.