diff --git a/suites/reef/nvmeof/tier-2_nvmeof_4nodes_gateway_ha_tests.yaml b/suites/reef/nvmeof/tier-2_nvmeof_4nodes_gateway_ha_tests.yaml index 8846cdb105..fd951e4437 100644 --- a/suites/reef/nvmeof/tier-2_nvmeof_4nodes_gateway_ha_tests.yaml +++ b/suites/reef/nvmeof/tier-2_nvmeof_4nodes_gateway_ha_tests.yaml @@ -126,7 +126,7 @@ tests: # NVMe 4-GW HA Test with mTLS configuration - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd do_not_create_image: true @@ -184,7 +184,7 @@ tests: # NVMe 4-GW HA Test with mTLS-to-Non-mTLS switch configuration - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd do_not_create_image: true @@ -242,7 +242,7 @@ tests: # Non-mTLS Tests # NVMe 4-GW Single node failure(s) - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd do_not_create_image: true @@ -298,7 +298,7 @@ tests: polarion-id: CEPH-83589016 - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd do_not_create_image: true @@ -385,7 +385,7 @@ tests: # 4GW HA Single-sub multinode Failover and failback parallely via ceph orchestrator daemon - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd do_not_create_image: true @@ -446,7 +446,7 @@ tests: # 4GW Multi node sequential failover-failback - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd do_not_create_image: true @@ -507,7 +507,7 @@ tests: # 4GW HA 2-subsystems multinode Failover and failback parallely - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd do_not_create_image: true @@ -578,7 +578,7 @@ tests: # 4GW HA 4-subsystems multinode Failover and failback parallely - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd do_not_create_image: true @@ -669,7 +669,7 @@ tests: # 4GW HA 4-subsystems multinode Failover and failback parallely - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd do_not_create_image: true @@ -762,7 +762,7 @@ tests: # 4GW HA 4-subsystems node Failover and failback using power off|on - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd do_not_create_image: true @@ -851,7 +851,7 @@ tests: # 4GW HA 4-subsystems node Failover and failback using maintanence_mode - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd do_not_create_image: true diff --git a/suites/squid/nvmeof/tier-1_nvmeof_4-nvmeof-gwgroup_2gw_tests.yaml b/suites/squid/nvmeof/tier-1_nvmeof_4-nvmeof-gwgroup_2gw_tests.yaml index fa6e363552..f1a0d94a70 100644 --- a/suites/squid/nvmeof/tier-1_nvmeof_4-nvmeof-gwgroup_2gw_tests.yaml +++ b/suites/squid/nvmeof/tier-1_nvmeof_4-nvmeof-gwgroup_2gw_tests.yaml @@ -5,12 +5,12 @@ tests: # Set up the cluster - test: - abort-on-fail: false + abort-on-fail: true module: install_prereq.py name: install ceph pre-requisites - test: - abort-on-fail: false + abort-on-fail: true config: verify_cluster_health: true steps: @@ -51,7 +51,7 @@ tests: name: deploy cluster - test: - abort-on-fail: false + abort-on-fail: true config: command: add id: client.1 diff --git a/suites/squid/nvmeof/tier-2_nvmeof_1gwgroup_8gwnodes_ha_tests.yaml b/suites/squid/nvmeof/tier-2_nvmeof_1gwgroup_8gwnodes_ha_tests.yaml index e5efbe97d5..e1686e7240 100644 --- a/suites/squid/nvmeof/tier-2_nvmeof_1gwgroup_8gwnodes_ha_tests.yaml +++ b/suites/squid/nvmeof/tier-2_nvmeof_1gwgroup_8gwnodes_ha_tests.yaml @@ -5,11 +5,11 @@ tests: # Set up the cluster - test: - abort-on-fail: false + abort-on-fail: true module: install_prereq.py name: install ceph pre-requisites - test: - abort-on-fail: false + abort-on-fail: true config: verify_cluster_health: true steps: @@ -50,7 +50,7 @@ tests: name: deploy cluster - test: - abort-on-fail: false + abort-on-fail: true config: command: add id: client.1 diff --git a/suites/squid/nvmeof/tier-2_nvmeof_1gwgroup_8gwnodes_loadbalancing_tests.yaml b/suites/squid/nvmeof/tier-2_nvmeof_1gwgroup_8gwnodes_loadbalancing_tests.yaml index 2bb8d661fa..ecdd9f4e66 100644 --- a/suites/squid/nvmeof/tier-2_nvmeof_1gwgroup_8gwnodes_loadbalancing_tests.yaml +++ b/suites/squid/nvmeof/tier-2_nvmeof_1gwgroup_8gwnodes_loadbalancing_tests.yaml @@ -65,7 +65,7 @@ tests: name: configure Ceph client for NVMe tests polarion-id: CEPH-83573758 -# 1 GWgroup 4GW 4-subsystems scaledown 1 node +# 1 GWgroup 4GW 4-subsystems scaledown 2 nodes -> scaleup 2 nodes - test: abort-on-fail: false config: @@ -125,14 +125,14 @@ tests: load_balancing: - scale_down: ["node6", "node7"] # scale down - scale_up: ["node6", "node7"] # scale up - - scale_up: ["node10", "node11"] # new nodes scale up - desc: 4GW 1GWgroup 4-subsystems namespace load balancing + - scale_up: ["node10", "node11"] # scale up new nodes + desc: 4GW 1GWgroup namespace load balancing destroy-cluster: false module: test_ceph_nvmeof_loadbalancing.py - name: Test NVMeoF 8-GW 1GWgroup 4-sub namespace load balancing + name: NVMeoF 4GW 1GWgroup namespaces load balancing polarion-id: CEPH-83598717 -# 1 GWgroup 8GW 4-subsystems scaledown from 8 - 2 nodes +# 1 GWgroup 8GW 4-subsystems scaledown from 8 - 2 nodes and scaleup from 2-8 nodes - test: abort-on-fail: false config: @@ -196,8 +196,8 @@ tests: load_balancing: - scale_down: ["node6", "node7", "node8", "node9", "node10", "node11"] # scale down - scale_up: ["node6", "node7", "node8", "node9", "node10", "node11"] # scale up - desc: 8GW 1GWgroup 4-subsystems namespace load balancing + desc: 8GW 1GWgroup namespaces load balancing destroy-cluster: false module: test_ceph_nvmeof_loadbalancing.py - name: Test NVMeoF 8-GW 1GWgroup 4-sub namespace load balancing + name: NVMeoF 8-GW 1GWgroup namespaces load balancing polarion-id: CEPH-83598716 diff --git a/suites/squid/nvmeof/tier-2_nvmeof_4nodes_gateway_ha_tests.yaml b/suites/squid/nvmeof/tier-2_nvmeof_4nodes_gateway_ha_tests.yaml index d6c71668ff..367f83b957 100644 --- a/suites/squid/nvmeof/tier-2_nvmeof_4nodes_gateway_ha_tests.yaml +++ b/suites/squid/nvmeof/tier-2_nvmeof_4nodes_gateway_ha_tests.yaml @@ -76,7 +76,7 @@ tests: desc: NVMe services with mTLS deployment using spec file. module: test_cephadm.py polarion-id: CEPH-83594617 - abort-on-fail: true + abort-on-fail: false config: steps: - config: @@ -127,7 +127,7 @@ tests: - "60" - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd gw_group: gw_group1 @@ -185,7 +185,7 @@ tests: polarion-id: CEPH-83594616 - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd do_not_create_image: true @@ -244,7 +244,7 @@ tests: # Non-mTLS Tests # NVMe 4-GW Single node failure(s) - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd gw_group: gw_group1 @@ -301,7 +301,7 @@ tests: polarion-id: CEPH-83589016 - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd gw_group: gw_group1 @@ -389,7 +389,7 @@ tests: # 4GW HA Single-sub multinode Failover and failback parallely via ceph orchestrator daemon - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd gw_group: gw_group1 @@ -451,7 +451,7 @@ tests: # 4GW Multi node sequential failover-failback - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd gw_group: gw_group1 @@ -513,7 +513,7 @@ tests: # 4GW HA 2-subsystems multinode Failover and failback parallely - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd gw_group: gw_group1 @@ -585,7 +585,7 @@ tests: # 4GW HA 4-subsystems multinode Failover and failback parallely - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd gw_group: gw_group1 @@ -677,7 +677,7 @@ tests: # 4GW HA 4-subsystems multinode Failover and failback parallely - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd gw_group: gw_group1 @@ -771,7 +771,7 @@ tests: # 4GW HA 4-subsystems node Failover and failback using power off|on - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd gw_group: gw_group1 @@ -861,7 +861,7 @@ tests: # 4GW HA 4-subsystems node Failover and failback using maintanence_mode - test: - abort-on-fail: true + abort-on-fail: false config: rbd_pool: rbd gw_group: gw_group1 diff --git a/tests/nvmeof/test_ceph_nvmeof_loadbalancing.py b/tests/nvmeof/test_ceph_nvmeof_loadbalancing.py index 2f8d942cac..aca84f6243 100644 --- a/tests/nvmeof/test_ceph_nvmeof_loadbalancing.py +++ b/tests/nvmeof/test_ceph_nvmeof_loadbalancing.py @@ -4,6 +4,8 @@ """ +import time +from concurrent.futures import ThreadPoolExecutor from copy import deepcopy from ceph.ceph import Ceph @@ -12,10 +14,8 @@ from ceph.parallel import parallel from ceph.utils import get_node_by_id from tests.nvmeof.workflows.ha import HighAvailability -from tests.nvmeof.workflows.nvme_utils import ( - delete_nvme_service, - deploy_nvme_service, -) +from tests.nvmeof.workflows.nvme_gateway import NVMeGateway +from tests.nvmeof.workflows.nvme_utils import delete_nvme_service, deploy_nvme_service from tests.rbd.rbd_utils import initial_rbd_config from utility.log import Log from utility.utils import generate_unique_id @@ -42,6 +42,34 @@ def configure_listeners(ha_obj, nodes, config): return lb_group_ids +def configure_namespaces(nvmegwcli, config, lb_groups, sub_args, pool, ceph_cluster): + bdev_configs = config["bdevs"] + if isinstance(config["bdevs"], dict): + bdev_configs = [config["bdevs"]] + for bdev_cfg in bdev_configs: + name = generate_unique_id(length=4) + namespace_args = { + **sub_args, + **{ + "rbd-pool": pool, + "rbd-create-image": True, + "size": bdev_cfg["size"], + }, + } + with parallel() as p: + # Create namespace in gateway + for num in range(bdev_cfg["count"]): + ns_args = deepcopy(namespace_args) + ns_args["rbd-image"] = f"{name}-image{num}" + if bdev_cfg.get("lb_group"): + lbgid = lb_groups[ + get_node_by_id(ceph_cluster, bdev_cfg["lb_group"]).hostname + ] + ns_args["load-balancing-group"] = lbgid + ns_args = {"args": ns_args} + p.spawn(nvmegwcli.namespace.add, **ns_args) + + def configure_subsystems(pool, ha, config): """Configure Ceph-NVMEoF Subsystems.""" sub_args = {"subsystem": config["nqn"]} @@ -73,10 +101,9 @@ def configure_subsystems(pool, ha, config): lb_groups = configure_listeners(ha, listeners, config) # Add Host access - if config.get("allow_host"): - nvmegwcli.host.add( - **{"args": {**sub_args, **{"host": repr(config["allow_host"])}}} - ) + nvmegwcli.host.add( + **{"args": {**sub_args, **{"host": repr(config.get("allow_host", "*"))}}} + ) if config.get("hosts"): for host in config["hosts"]: @@ -87,31 +114,7 @@ def configure_subsystems(pool, ha, config): # Add Namespaces if config.get("bdevs"): - bdev_configs = config["bdevs"] - if isinstance(config["bdevs"], dict): - bdev_configs = [config["bdevs"]] - for bdev_cfg in bdev_configs: - name = generate_unique_id(length=4) - namespace_args = { - **sub_args, - **{ - "rbd-pool": pool, - "rbd-create-image": True, - "size": bdev_cfg["size"], - }, - } - with parallel() as p: - # Create namespace in gateway - for num in range(bdev_cfg["count"]): - ns_args = deepcopy(namespace_args) - ns_args["rbd-image"] = f"{name}-image{num}" - if bdev_cfg.get("lb_group"): - lbgid = lb_groups[ - get_node_by_id(ceph_cluster, bdev_cfg["lb_group"]).hostname - ] - ns_args["load-balancing-group"] = lbgid - ns_args = {"args": ns_args} - p.spawn(nvmegwcli.namespace.add, **ns_args) + configure_namespaces(nvmegwcli, config, lb_groups, sub_args, pool, ceph_cluster) def disconnect_initiator(ceph_cluster, node): @@ -195,6 +198,9 @@ def run(ceph_cluster: Ceph, **kwargs) -> int: config = kwargs["config"] rbd_pool = config["rbd_pool"] rbd_obj = initial_rbd_config(**kwargs)["rbd_reppool"] + initiators = config["initiators"] + io_tasks = [] + executor = ThreadPoolExecutor() overrides = kwargs.get("test_data", {}).get("custom-config") for key, value in dict(item.split("=") for item in overrides).items(): @@ -217,7 +223,61 @@ def run(ceph_cluster: Ceph, **kwargs) -> int: p.spawn(configure_subsystems, rbd_pool, ha, subsys_args) # Initiate scale-down and scale-up - ha.run_ns_autoloadbalancing_operations() + if config.get("load_balancing"): + for lb_config in config.get("load_balancing"): + # Scale down + if lb_config.get("scale_down"): + gateway_nodes = lb_config["scale_down"] + # Prepare FIO Execution + namespaces = ha.fetch_namespaces(ha.gateways[0]) + ha.prepare_io_execution(initiators) + + # Check for targets at clients + ha.compare_client_namespace([i["uuid"] for i in namespaces]) + + # Start IO Execution + LOG.info("Initiating IO before scaling operations") + for initiator in ha.clients: + io_tasks.append(executor.submit(initiator.start_fio)) + time.sleep(20) # time sleep for IO to Kick-in + + ha.scale_down(gateway_nodes) + + # Scale up + if lb_config.get("scale_up"): + scaleup_nodes = lb_config["scale_up"] + gateway_nodes = config["gw_nodes"] + + # Prepare FIO Execution + gw_node = get_node_by_id(ceph_cluster, config["gw_nodes"][-1]) + gateways = NVMeGateway(gw_node, ha.mtls) + namespaces = ha.fetch_namespaces(gateways) + ha.prepare_io_execution(initiators) + + # Check for targets at clients + ha.compare_client_namespace([i["uuid"] for i in namespaces]) + + # Start IO Execution + LOG.info("Initiating IO before scale up") + for initiator in ha.clients: + io_tasks.append(executor.submit(initiator.start_fio)) + time.sleep(20) # time sleep for IO to Kick-in + + # Perform scale-up of new nodes + if not all( + [node in set(config["gw_nodes"]) for node in scaleup_nodes] + ): + # Perform scale up + namespaces = ha.scale_up(scaleup_nodes) + for scaleup_node in scaleup_nodes: + gw_node = get_node_by_id(ceph_cluster, scaleup_node) + ha.gateways.append(NVMeGateway(gw_node)) + for subsys_args in config["subsystems"]: + configure_listeners(ha, [scaleup_node], subsys_args) + ha.validate_scaleup(scaleup_nodes, namespaces) + else: + namespaces = ha.scale_up(scaleup_nodes) + ha.validate_scaleup(scaleup_nodes, namespaces) return 0 except Exception as err: @@ -227,3 +287,6 @@ def run(ceph_cluster: Ceph, **kwargs) -> int: finally: if config.get("cleanup"): teardown(ceph_cluster, rbd_obj, config) + if io_tasks: + LOG.info("Waiting for completion of IOs.") + executor.shutdown(wait=True, cancel_futures=True) diff --git a/tests/nvmeof/workflows/ha.py b/tests/nvmeof/workflows/ha.py index 537b174b79..a51f41b6e8 100644 --- a/tests/nvmeof/workflows/ha.py +++ b/tests/nvmeof/workflows/ha.py @@ -471,20 +471,32 @@ def scale_down(self, gateway_nodes): - List out namespaces associated with the scaled down Gateways using ANA group ids. - Check for 5 Consecutive times for the increments in write/read to validate IO continuation. """ - fail_gws, _ = self.catogorize(gateway_nodes) start_counter = float() start_time = str() end_counter = float() end_time = str() - gwnodes_to_be_deployed = list(set(self.config["gw_nodes"]) - set(gateway_nodes)) - self.config["gw_nodes"] = gwnodes_to_be_deployed LOG.info(f"{gateway_nodes}: Scaling down NVMe Service") + if not isinstance(gateway_nodes, list): + gateway_nodes = [gateway_nodes] + + scaledown_gws, operational_gws = self.catogorize(gateway_nodes) + ana_ids = [gw.ana_group_id for gw in scaledown_gws] + gateway = operational_gws[0] + + # Validate IO and scale operation + old_namespaces = self.fetch_namespaces(gateway, ana_ids) + self.validate_io(old_namespaces) + # Scale down + gwnodes_to_be_deployed = list(set(self.config["gw_nodes"]) - set(gateway_nodes)) + self.config["gw_nodes"] = gwnodes_to_be_deployed deploy_nvme_service(self.cluster, self.config) + start_counter, start_time = get_current_timestamp() - for gateway in fail_gws: + for gateway in scaledown_gws: hostname = gateway.hostname + # Wait until 60 seconds for w in WaitUntil(): # Check for gateway unavailability @@ -512,44 +524,41 @@ def scale_down(self, gateway_nodes): LOG.info( f"[ {hostname} ] Total time taken to scale down - {end_counter - start_counter} seconds" ) - return { + + result = { "scale-down-start-time": start_time, "scale-down-end-time": end_time, "scale-down-start-counter-time": start_counter, "scale-down-end-counter-time": end_counter, } + LOG.info(log_json_dump(result)) + self.validate_io(old_namespaces) + return result - def scale_up(self, gateway_nodes): - """Scaling up of the NVMeoF Gateways. - - Initiate scale-up - - Spin up the new gateways. - - Validate the ANA states of new GWs are optimized. - - Post scale-up Validation + def validate_scaleup(self, scaleup_nodes, namespaces): + """ - List out namespaces associated with the new Gateways using ANA group ids. - Check for 5 Consecutive times for the increments in write/read to validate IO continuation. + + Args: + scaleup_nodes (list): A list of gateway nodes to be scaled up. """ - new_gws = [] start_counter = float() start_time = str() end_counter = float() end_time = str() - gwnodes_to_be_deployed = list(set(self.config["gw_nodes"] + gateway_nodes)) - self.config["gw_nodes"] = gwnodes_to_be_deployed - LOG.info(f"{gateway_nodes}: Scaling up NVMe Service") + new_gws = [] - # Scale up - deploy_nvme_service(self.cluster, self.config) - for gateway_node in gateway_nodes: + for gateway_node in scaleup_nodes: gw = get_node_by_id(self.cluster, gateway_node) new_gws.append(NVMeGateway(gw, self.mtls)) + start_counter, start_time = get_current_timestamp() for gateway in new_gws: hostname = gateway.hostname # Wait until 60 seconds - for w in WaitUntil(): + for w in WaitUntil(timeout=60): # Check for gateway availability if self.check_gateway_availability(gateway.ana_group_id): LOG.info(f"[ {gateway} ] NVMeofGW service is AVAILABLE.") @@ -568,18 +577,55 @@ def scale_up(self, gateway_nodes): if w.expired: raise TimeoutError( - f"[ {hostname} ] Scale up of NVMeofGW service failed after 60s timeout.." + f"[ {hostname} ] Scale up of NVMeofGW service failed after 120s timeout.." ) LOG.info( f"[ {hostname} ] Total time taken to scale up - {end_counter - start_counter} seconds" ) - return { + result = { "scale-up-start-time": start_time, "scale-up-end-time": end_time, "scale-up-start-counter-time": start_counter, "scale-up-end-counter-time": end_counter, } + LOG.info(log_json_dump(result)) + self.validate_io(namespaces) + result["new_gws"] = new_gws + result["scaleup_nodes"] = scaleup_nodes + return result + + def scale_up(self, scaleup_nodes): + """Scaling up of the NVMeoF Gateways. + + Initiate scale-up + - Spin up the new gateways. + - Validate the ANA states of new GWs are optimized. + + Post scale-up Validation + - List out namespaces associated with the new Gateways using ANA group ids. + - Check for 5 Consecutive times for the increments in write/read to validate IO continuation. + """ + + LOG.info(f"{scaleup_nodes}: Scaling up NVMe Service") + + if not isinstance(scaleup_nodes, list): + scaleup_nodes = [scaleup_nodes] + + old_gws = [self.check_gateway(gw_id) for gw_id in self.config["gw_nodes"]] + ana_ids = [gw.ana_group_id for gw in old_gws] + gateway = old_gws[0] + + # Validate IO before scale up operation + old_namespaces = self.fetch_namespaces(gateway, ana_ids) + self.validate_io(old_namespaces) + + # Scale up + gwnodes_to_be_deployed = list(set(self.config["gw_nodes"] + scaleup_nodes)) + self.config["gw_nodes"] = gwnodes_to_be_deployed + deploy_nvme_service(self.cluster, self.config) + + return old_namespaces def failover(self, gateway, fail_tool): """HA Failover on the NVMeoF Gateways. @@ -827,94 +873,6 @@ def validate_incremetal_io(write_samples): LOG.info("IO Validation is Successfull on all RBD images..") - def execute_scale_operation(self, gateway_nodes, operation="down"): - """Handles scale up or scale down with IO validation.""" - LOG.info(f"Executing scale {operation} on {gateway_nodes}") - - if not isinstance(gateway_nodes, list): - gateway_nodes = [gateway_nodes] - - if operation == "down": - scaledown_gws, operational_gws = self.catogorize(gateway_nodes) - ana_ids = [gw.ana_group_id for gw in scaledown_gws] - gateway = operational_gws[0] - else: - old_gws = [self.check_gateway(gw_id) for gw_id in self.config["gw_nodes"]] - ana_ids = [gw.ana_group_id for gw in old_gws] - gateway = old_gws[0] - - # Validate IO and scale operation - namespaces = self.fetch_namespaces(gateway, ana_ids) - self.validate_io(namespaces) - - # Perform the scale operation - result = ( - self.scale_down(gateway_nodes) - if operation == "down" - else self.scale_up(gateway_nodes) - ) - LOG.info(log_json_dump(result)) - self.validate_io(namespaces) - - def run_ns_autoloadbalancing_operations(self): - """Run Scale Down and Scale Up operations with IO validation.""" - initiators = self.config["initiators"] - executor = ThreadPoolExecutor() - io_tasks = [] - - try: - # Execute scale down and scale up operations - if self.config.get("load_balancing"): - for lb_config in self.config.get("load_balancing"): - # Scale down - if lb_config.get("scale_down"): - gateway_nodes = lb_config["scale_down"] - # Prepare FIO Execution - namespaces = self.fetch_namespaces(self.gateways[0]) - self.prepare_io_execution(initiators) - - # Check for targets at clients - self.compare_client_namespace([i["uuid"] for i in namespaces]) - - # Start IO Execution - LOG.info("Initiating IO before scaling operations") - for initiator in self.clients: - io_tasks.append(executor.submit(initiator.start_fio)) - time.sleep(20) # time sleep for IO to Kick-in - - self.execute_scale_operation(gateway_nodes, operation="down") - - # Scale up - if lb_config.get("scale_up"): - gateway_nodes = lb_config["scale_up"] - - # Prepare FIO Execution - gw_node = get_node_by_id( - self.cluster, self.config["gw_nodes"][-1] - ) - gateways = NVMeGateway(gw_node, self.mtls) - namespaces = self.fetch_namespaces(gateways) - self.prepare_io_execution(initiators) - - # Check for targets at clients - self.compare_client_namespace([i["uuid"] for i in namespaces]) - - # Start IO Execution - LOG.info("Initiating IO before scale up") - for initiator in self.clients: - io_tasks.append(executor.submit(initiator.start_fio)) - time.sleep(20) # time sleep for IO to Kick-in - - self.execute_scale_operation(gateway_nodes, operation="up") - - except BaseException as err: - raise Exception(err) - - finally: - if io_tasks: - LOG.info("Waiting for completion of IOs.") - executor.shutdown(wait=True, cancel_futures=True) - def run(self): """Execute the HA failover and failback with IO validation.""" fail_methods = self.config["fault-injection-methods"]