From 1afa9a67c1c3064179a304fda56001433f6529d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Svensson?= Date: Mon, 19 Aug 2024 17:13:30 +0200 Subject: [PATCH] No slot map updates during a cluster client disconnect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Björn Svensson --- src/valkeycluster.c | 4 + tests/CMakeLists.txt | 4 + ...-disconnect-without-slotmap-update-test.sh | 104 ++++++++++++++++++ 3 files changed, 112 insertions(+) create mode 100755 tests/scripts/client-disconnect-without-slotmap-update-test.sh diff --git a/src/valkeycluster.c b/src/valkeycluster.c index da88b57..c809ed0 100644 --- a/src/valkeycluster.c +++ b/src/valkeycluster.c @@ -3175,6 +3175,10 @@ static int updateSlotMapAsync(valkeyClusterAsyncContext *acc, /* Don't allow concurrent slot map updates. */ return VALKEY_ERR; } + if (acc->cc->flags & VALKEYCLUSTER_FLAG_DISCONNECTING) { + /* No slot map updates during a cluster client disconnect. */ + return VALKEY_ERR; + } if (ac == NULL) { if (acc->cc->nodes == NULL) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d87f61c..d2b8da5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -268,4 +268,8 @@ if (LIBEVENT_LIBRARY) COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/client-disconnect-test.sh" "$" WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/") + add_test(NAME client-disconnect-without-slotmap-update-test-async + COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/client-disconnect-without-slotmap-update-test.sh" + "$" + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/") endif() diff --git a/tests/scripts/client-disconnect-without-slotmap-update-test.sh b/tests/scripts/client-disconnect-without-slotmap-update-test.sh new file mode 100755 index 0000000..a456e47 --- /dev/null +++ b/tests/scripts/client-disconnect-without-slotmap-update-test.sh @@ -0,0 +1,104 @@ +#!/bin/bash +# +# Verify that a client disconnects from known nodes without starting new +# slot map updates. A client shouldn't reconnect or connect to new nodes +# while shutting down. +# +# This testcase starts 2 cluster nodes and the client connects to both. +# The client triggers a disconnect right after a command has been sent, +# which will invoke its callback with a NULL reply. This NULL reply +# should not trigger a slot map update. +# +# Usage: $0 /path/to/clusterclient-binary + +clientprog=${1:-./clusterclient_async} +testname=client-disconnect-without-slotmap-update-test + +# Sync process just waiting for server to be ready to accept connection. +perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' & +syncpid1=$! +perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' & +syncpid2=$! + +# Start simulated valkey node #1 +timeout 5s ./simulated-valkey.pl -p 7401 -d --sigcont $syncpid1 <<'EOF' & +EXPECT CONNECT +EXPECT ["CLUSTER", "SLOTS"] +SEND [[0, 6000, ["127.0.0.1", 7401, "nodeid1"]],[6001, 16383, ["127.0.0.1", 7402, "nodeid2"]]] +EXPECT CLOSE + +EXPECT CONNECT +EXPECT ["SET", "bar", "initial"] +SEND +OK + +# Normally a slot map update is expected here, but not during disconnects. + +EXPECT CLOSE +EOF +server1=$! + +# Start simulated valkey node #2 +timeout 5s ./simulated-valkey.pl -p 7402 -d --sigcont $syncpid2 <<'EOF' & +EXPECT CONNECT +EXPECT ["SET", "foo", "initial"] +SEND +OK + +EXPECT ["SET", "foo", "null-reply"] +# The client will invoke callbacks for outstanding requests with a NULL reply. +# Normally this would trigger a slot map update, but not during disconnects. + +EXPECT CLOSE +EOF +server2=$! + +# Wait until both nodes are ready to accept client connections +wait $syncpid1 $syncpid2; + +# Run client +timeout 4s "$clientprog" --connection-events 127.0.0.1:7401 > "$testname.out" <<'EOF' +SET foo initial +SET bar initial + +# Make sure a slot map update is not throttled. +!sleep + +# Send a command just before requesting a client disconnect. +# A NULL reply should not trigger a slot map update after a disconnect. +!async +SET foo null-reply +!disconnect +!sync + +EOF +clientexit=$? + +# Wait for servers to exit +wait $server1; server1exit=$? +wait $server2; server2exit=$? + +# Check exit statuses +if [ $server1exit -ne 0 ]; then + echo "Simulated server #1 exited with status $server1exit" + exit $server1exit +fi +if [ $server2exit -ne 0 ]; then + echo "Simulated server #2 exited with status $server2exit" + exit $server2exit +fi +if [ $clientexit -ne 0 ]; then + echo "$clientprog exited with status $clientexit" + exit $clientexit +fi + +expected="Event: connect to 127.0.0.1:7402 +OK +Event: connect to 127.0.0.1:7401 +OK +Event: disconnect from 127.0.0.1:7401 +error: Timeout +Event: failed to disconnect from 127.0.0.1:7402" + +echo "$expected" | diff -u - "$testname.out" || exit 99 + +# Clean up +rm "$testname.out"