From 365919b1d90ba1ffbcda7e1a05e4b10187edfab7 Mon Sep 17 00:00:00 2001 From: Dmitry Savitskiy Date: Tue, 27 Jun 2023 22:33:28 +0300 Subject: [PATCH] fix(nexus): fixing child retire during rebuild A Python test for retire-during rebuild added Signed-off-by: Dmitry Savitskiy --- mayastor/src/bdev/nexus/nexus_channel.rs | 18 ++- scripts/pytest-tests.sh | 2 +- test/python/common/hdl.py | 11 +- test/python/tests/nexus/test_nexus_rebuild.py | 122 ++++++++++++++++++ 4 files changed, 146 insertions(+), 7 deletions(-) create mode 100644 test/python/tests/nexus/test_nexus_rebuild.py diff --git a/mayastor/src/bdev/nexus/nexus_channel.rs b/mayastor/src/bdev/nexus/nexus_channel.rs index e5b2db7304..2c79636b4b 100644 --- a/mayastor/src/bdev/nexus/nexus_channel.rs +++ b/mayastor/src/bdev/nexus/nexus_channel.rs @@ -52,7 +52,12 @@ pub(crate) fn fault_nexus_child(nexus: Pin<&mut Nexus>, name: &str) -> bool { nexus .children .iter() - .filter(|c| c.state() == ChildState::Open) + .filter(|c| { + matches!( + c.state(), + ChildState::Open | ChildState::Faulted(Reason::OutOfSync) + ) + }) .filter(|c| { // If there were previous retires, we do not have a reference // to a BlockDevice. We do however, know it can't be the device @@ -65,11 +70,16 @@ pub(crate) fn fault_nexus_child(nexus: Pin<&mut Nexus>, name: &str) -> bool { } }) .any(|c| { - Ok(ChildState::Open) + Ok(ChildState::Faulted(Reason::OutOfSync)) == c.state.compare_exchange( - ChildState::Open, - ChildState::Faulted(Reason::IoError), + ChildState::Faulted(Reason::OutOfSync), + ChildState::Faulted(Reason::RebuildFailed), ) + || Ok(ChildState::Open) + == c.state.compare_exchange( + ChildState::Open, + ChildState::Faulted(Reason::IoError), + ) }) } diff --git a/scripts/pytest-tests.sh b/scripts/pytest-tests.sh index 2f003cfce3..21dc36fd66 100755 --- a/scripts/pytest-tests.sh +++ b/scripts/pytest-tests.sh @@ -26,7 +26,7 @@ function run_tests() ( set -x base=$(dirname "$name") - python -m pytest --tc-file='test_config.ini' --docker-compose="$base" "$name" + python -m pytest --tc-file='test_config.ini' --docker-compose="$base" "$name" -svv ) fi done diff --git a/test/python/common/hdl.py b/test/python/common/hdl.py index ed193d3459..f401897a25 100644 --- a/test/python/common/hdl.py +++ b/test/python/common/hdl.py @@ -1,4 +1,5 @@ """Common code that represents a mayastor handle.""" +from urllib.parse import urlparse import mayastor_pb2 as pb import grpc import mayastor_pb2_grpc as rpc @@ -132,8 +133,14 @@ def replica_list_v2(self): def nexus_create(self, uuid, size, children): """Create a nexus with the given uuid and size. The children should be an array of nvmf URIs.""" + children_ = [] + for child in children: + u = urlparse(child) + host = u.hostname + if host != self.ip_v4: + children_.append(child) return self.ms.CreateNexus( - pb.CreateNexusRequest(uuid=str(uuid), size=size, children=children) + pb.CreateNexusRequest(uuid=str(uuid), size=size, children=children_) ) def nexus_create_v2( @@ -178,7 +185,7 @@ def nexus_list_v2(self): """List all the nexus devices, with separate name and uuid.""" return self.ms.ListNexusV2(pb.Null()).nexus_list - def nexus_add_replica(self, uuid, uri, norebuild): + def nexus_add_replica(self, uuid, uri, norebuild=False): """Add a new replica to the nexus""" return self.ms.AddChildNexus( pb.AddChildNexusRequest(uuid=uuid, uri=uri, norebuild=norebuild) diff --git a/test/python/tests/nexus/test_nexus_rebuild.py b/test/python/tests/nexus/test_nexus_rebuild.py new file mode 100644 index 0000000000..276f0918e7 --- /dev/null +++ b/test/python/tests/nexus/test_nexus_rebuild.py @@ -0,0 +1,122 @@ +from common.hdl import MayastorHandle +from common.command import run_cmd, run_cmd_async +from common.nvme import nvme_connect, nvme_disconnect +from common.fio import Fio +from common.fio_spdk import FioSpdk +from common.mayastor import containers, mayastors, create_temp_files, check_size +import pytest +import asyncio +import uuid as guid +import time +import subprocess +import mayastor_pb2 as pb + +NEXUS_COUNT = 10 +NEXUS_SIZE = 500 * 1024 * 1024 +REPL_SIZE = NEXUS_SIZE +POOL_SIZE = REPL_SIZE * NEXUS_COUNT + 100 * 1024 * 1024 + + +@pytest.fixture +def local_files(mayastors): + files = [] + for name, ms in mayastors.items(): + path = f"/tmp/disk-{name}.img" + pool_size_mb = int(POOL_SIZE / 1024 / 1024) + subprocess.run( + ["sudo", "sh", "-c", f"rm -f '{path}'; truncate -s {pool_size_mb}M '{path}'"], + check=True, + ) + files.append(path) + + yield + for path in files: + subprocess.run(["sudo", "rm", "-f", path], check=True) + + +@pytest.fixture +def create_replicas_on_all_nodes(local_files, mayastors, create_temp_files): + uuids = [] + + for name, ms in mayastors.items(): + ms.pool_create(name, f"aio:///tmp/disk-{name}.img") + # verify we have zero replicas + assert len(ms.replica_list().replicas) == 0 + + for i in range(NEXUS_COUNT): + uuid = guid.uuid4() + for name, ms in mayastors.items(): + before = ms.pool_list() + ms.replica_create(name, uuid, REPL_SIZE) + after = ms.pool_list() + uuids.append(uuid) + + yield uuids + + +@pytest.fixture +def create_nexuses(mayastors, create_replicas_on_all_nodes): + nexuses = [] + nexuses_uris = [] + + uris = [ + [replica.uri for replica in mayastors.get(node).replica_list().replicas] + for node in ["ms1", "ms2", "ms3"] + ] + + ms = mayastors.get("ms0") + for children in zip(*uris): + uuid = guid.uuid4() + nexus = ms.nexus_create(uuid, NEXUS_SIZE, list(children)) + nexuses.append(nexus) + nexuses_uris.append(ms.nexus_publish(uuid)) + + yield nexuses + + +@pytest.mark.parametrize("times", range(10)) +def test_rebuild_failure(containers, mayastors, times, create_nexuses): + ms0 = mayastors.get("ms0") + ms3 = mayastors.get("ms3") + + # Restart container with replica #3 (ms3). + node3 = containers.get("ms3") + node3.stop() + time.sleep(5) + node3.start() + + # Reconnect ms3, and import the existing pool. + ms3.reconnect() + ms3.pool_create("ms1", "aio:///tmp/disk-ms1.img") + time.sleep(1) + + # Add the replicas to the nexuses for rebuild. + for (idx, nexus) in enumerate(ms0.nexus_list()): + child = list(filter(lambda child: child.state == pb.CHILD_FAULTED, list(nexus.children)))[0] + if nexus.state != pb.NEXUS_FAULTED: + try: + ms0.nexus_remove_replica(nexus.uuid, child.uri) + ms0.nexus_add_replica(nexus.uuid, child.uri) + except: + print(f"Failed to remove child {child.uri} from {nexus}") + + time.sleep(5) + + rebuilds = 0 + for nexus in ms0.nexus_list(): + for child in nexus.children: + if child.rebuild_progress > -1: + rebuilds += 1 + print("nexus", nexus.uuid, "rebuilding", child.uri, f"{child.rebuild_progress}") + + assert rebuilds > 0 + + # Stop ms3 again. Rebuild jobs in progress must terminate. + node3.stop() + + time.sleep(30) + + # All rebuild jobs must finish. + for nexus in ms0.nexus_list(): + for child in nexus.children: + assert child.rebuild_progress == -1