From 9355066e04c655fd4b594844b8e9ca7036782bfc Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Sun, 15 Jan 2023 15:01:14 +0100 Subject: [PATCH 001/169] dev branch is now v1.5.x --- src/cnaas_nms/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cnaas_nms/version.py b/src/cnaas_nms/version.py index e8bc7353..3a8d0fef 100644 --- a/src/cnaas_nms/version.py +++ b/src/cnaas_nms/version.py @@ -1,3 +1,3 @@ -__version__ = "1.4.0b3" +__version__ = "1.5.0a1" __version_info__ = tuple([field for field in __version__.split(".")]) __api_version__ = "v1.0" From a8e1af399d41296e87c6202d43a93d03e7b542a5 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 10 Jan 2023 09:44:16 +0100 Subject: [PATCH 002/169] rebase with black formating --- src/cnaas_nms/devicehandler/sync_devices.py | 29 ++++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index bb87ebd9..0d39f879 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -5,7 +5,7 @@ from typing import List, Optional import yaml -from nornir.core.task import MultiResult +from nornir.core.task import MultiResult, Result from nornir_jinja2.plugins.tasks import template_file from nornir_napalm.plugins.tasks import napalm_configure, napalm_get from nornir_utils.plugins.functions import print_result @@ -343,6 +343,21 @@ def populate_device_vars( return device_variables +def napalm_configure_confirmed(task, dry_run=None, configuration=None, replace=None): + n_device = task.host.get_connection("napalm") + n_device.load_replace_candidate(config=configuration) + diff = n_device.compare_config() + if diff: + n_device.commit_config(revert_in=300) + if n_device.has_pending_commit(): + n_device.confirm_commit() + else: + n_device.discard_config() + else: + n_device.discard_config() + return Result(host=task.host, diff=diff, changed=len(diff) > 0) + + def push_sync_device( task, dry_run: bool = True, @@ -404,12 +419,12 @@ def push_sync_device( ) task.host.open_connection("napalm", configuration=task.nornir.config) + if dry_run: + run_task = napalm_configure + else: + run_task = napalm_configure_confirmed task.run( - task=napalm_configure, - name="Sync device config", - replace=True, - configuration=task.host["config"], - dry_run=dry_run, + task=run_task, name="Sync device config", replace=True, configuration=task.host["config"], dry_run=dry_run ) task.host.close_connection("napalm") @@ -543,7 +558,7 @@ def sync_devices( of sync. Args: - hostname: Specify a single host by hostname to synchronize + hostnames: Specify a single host by hostname to synchronize device_type: Specify a device type to synchronize group: Specify a group of devices to synchronize dry_run: Don't commit generated config to device From ef098069134bdf56a4698e79980e38d52c35b31c Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 10 Jan 2023 15:24:59 +0100 Subject: [PATCH 003/169] Pass different args to napalm_config_X depending on what task function, so we can get job_id into the confirmed version --- src/cnaas_nms/devicehandler/sync_devices.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 0d39f879..0f908ac1 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -11,7 +11,7 @@ from nornir_utils.plugins.functions import print_result import cnaas_nms.db.helper -from cnaas_nms.app_settings import app_settings +from cnaas_nms.app_settings import api_settings, app_settings from cnaas_nms.db.device import Device, DeviceState, DeviceType from cnaas_nms.db.device_vars import expand_interface_settings from cnaas_nms.db.git import RepoStructureException @@ -419,13 +419,21 @@ def push_sync_device( ) task.host.open_connection("napalm", configuration=task.nornir.config) + task_args = { + "name": "Sync device config", + "replace": True, + "configuration": task.host["config"], + "dry_run": dry_run, + "commit_message": "Job id {}".format(job_id), + } if dry_run: - run_task = napalm_configure + task_args["task"] = napalm_configure + elif api_settings.COMMIT_CONFIRMED_MODE == 0: + task_args["task"] = napalm_configure else: - run_task = napalm_configure_confirmed - task.run( - task=run_task, name="Sync device config", replace=True, configuration=task.host["config"], dry_run=dry_run - ) + task_args["task"] = napalm_configure_confirmed + task_args["job_id"] = job_id + task.run(**task_args) task.host.close_connection("napalm") if task.results[1].diff: From 01b7304d70aba5b3b38cdc0ba27810863e542547 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 10 Jan 2023 15:25:59 +0100 Subject: [PATCH 004/169] Begin work on commit confirmed with 3 modes, 0=no confirm, 1=per device confirm, 2=next job id with confirm (platform dependent?) --- src/cnaas_nms/app_settings.py | 4 +++ src/cnaas_nms/devicehandler/sync_devices.py | 39 +++++++++++++++++---- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/src/cnaas_nms/app_settings.py b/src/cnaas_nms/app_settings.py index 2dc48e6a..5b7404f7 100644 --- a/src/cnaas_nms/app_settings.py +++ b/src/cnaas_nms/app_settings.py @@ -49,6 +49,8 @@ class ApiSettings(BaseSettings): GLOBAL_UNIQUE_VLANS: bool = True INIT_MGMT_TIMEOUT: int = 30 MGMTDOMAIN_RESERVED_COUNT: int = 5 + COMMIT_CONFIRMED_MODE: int = 1 + COMMIT_CONFIRMED_TIMEOUT: int = 300 def construct_api_settings() -> ApiSettings: @@ -76,6 +78,8 @@ def construct_api_settings() -> ApiSettings: GLOBAL_UNIQUE_VLANS=config.get("global_unique_vlans", True), INIT_MGMT_TIMEOUT=config.get("init_mgmt_timeout", 30), MGMTDOMAIN_RESERVED_COUNT=config.get("mgmtdomain_reserved_count", 5), + COMMIT_CONFIRMED_MODE=config.get("commit_confirmed_mode", 1), + COMMIT_CONFIRMED_TIMEOUT=config.get("commit_confirmed_timeout", 300), ) else: return ApiSettings() diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 0f908ac1..c4494add 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -5,6 +5,7 @@ from typing import List, Optional import yaml +from napalm.eos import EOSDriver as NapalmEOSDriver from nornir.core.task import MultiResult, Result from nornir_jinja2.plugins.tasks import template_file from nornir_napalm.plugins.tasks import napalm_configure, napalm_get @@ -343,21 +344,45 @@ def populate_device_vars( return device_variables -def napalm_configure_confirmed(task, dry_run=None, configuration=None, replace=None): - n_device = task.host.get_connection("napalm") +def napalm_configure_confirmed( + task, dry_run=None, configuration=None, replace=None, commit_message: str = "", job_id: int = 0 +): + """Configure device and set configure confirmed timeout to revert changes unless a confirm is received""" + logger = get_logger() + n_device = task.host.get_connection("napalm", task.nornir.config) n_device.load_replace_candidate(config=configuration) diff = n_device.compare_config() if diff: - n_device.commit_config(revert_in=300) - if n_device.has_pending_commit(): - n_device.confirm_commit() - else: - n_device.discard_config() + n_device.commit_config(revert_in=api_settings.COMMIT_CONFIRMED_TIMEOUT) + mode_2_supported = False + if api_settings.COMMIT_CONFIRMED_MODE == 2: + if isinstance(n_device, NapalmEOSDriver): + mode_2_supported = True + n_device.session_config = "job{}".format(job_id) + else: + logger.warn( + f"commit_confirmed_mode is set to 2, but it's unsupported for device OS '{task.host.platform}'. " + f"Falling back to mode 1 for device: {task.host.name}." + ) + + if api_settings.COMMIT_CONFIRMED_MODE == 1 or not mode_2_supported: + if n_device.has_pending_commit(): + n_device.confirm_commit() + else: + n_device.discard_config() else: n_device.discard_config() return Result(host=task.host, diff=diff, changed=len(diff) > 0) +def napalm_confirm_commit(task, job_id: int = 0): + """Confirm a previous pending configure session""" + n_device = task.host.get_connection("napalm") + if isinstance(n_device, NapalmEOSDriver): + n_device.session_config = "job{}".format(job_id) + n_device.confirm_commit() + + def push_sync_device( task, dry_run: bool = True, From 0da30be404815440d10c51fdaafff9a95cf7b0f0 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 11 Jan 2023 15:49:35 +0100 Subject: [PATCH 005/169] break out parts of sync_device into separate function. add job function confirm_devices to run as second job after syncto, to confirm all devices. works with Arista if EOS-driver is changed to not discard changes at close() --- src/cnaas_nms/devicehandler/sync_devices.py | 144 ++++++++++++++++---- 1 file changed, 118 insertions(+), 26 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index c4494add..f78cff32 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -2,10 +2,11 @@ import os from hashlib import sha256 from ipaddress import IPv4Address, IPv4Interface -from typing import List, Optional +from typing import List, Optional, Tuple import yaml from napalm.eos import EOSDriver as NapalmEOSDriver +from nornir.core import Nornir from nornir.core.task import MultiResult, Result from nornir_jinja2.plugins.tasks import template_file from nornir_napalm.plugins.tasks import napalm_configure, napalm_get @@ -350,6 +351,8 @@ def napalm_configure_confirmed( """Configure device and set configure confirmed timeout to revert changes unless a confirm is received""" logger = get_logger() n_device = task.host.get_connection("napalm", task.nornir.config) + if isinstance(n_device, NapalmEOSDriver): + n_device.config_session = "job{}".format(job_id) n_device.load_replace_candidate(config=configuration) diff = n_device.compare_config() if diff: @@ -358,7 +361,6 @@ def napalm_configure_confirmed( if api_settings.COMMIT_CONFIRMED_MODE == 2: if isinstance(n_device, NapalmEOSDriver): mode_2_supported = True - n_device.session_config = "job{}".format(job_id) else: logger.warn( f"commit_confirmed_mode is set to 2, but it's unsupported for device OS '{task.host.platform}'. " @@ -375,11 +377,11 @@ def napalm_configure_confirmed( return Result(host=task.host, diff=diff, changed=len(diff) > 0) -def napalm_confirm_commit(task, job_id: int = 0): +def napalm_confirm_commit(task, prev_job_id: int = 0): """Confirm a previous pending configure session""" - n_device = task.host.get_connection("napalm") + n_device = task.host.get_connection("napalm", task.nornir.config) if isinstance(n_device, NapalmEOSDriver): - n_device.session_config = "job{}".format(job_id) + n_device.config_session = "job{}".format(prev_job_id) n_device.confirm_commit() @@ -574,6 +576,98 @@ def confcheck_devices(hostnames: List[str], job_id=None): raise Exception("Configuration hash check failed for {}".format(" ".join(nrresult.failed_hosts.keys()))) +def select_devices( + nr: Nornir, + hostnames: Optional[List[str]] = None, + device_type: Optional[str] = None, + group: Optional[str] = None, + resync: bool = False, + **kwargs, +) -> Tuple[Nornir, int, List[str]]: + """Get device selection for devices to synchronize. + + Returns: + Nornir: A filtered Nornir object based on the input arg nr + int: A count of number of devices selected + List[str]: A list of hostnames that will be skipped from the initial nr object + """ + logger = get_logger() + if hostnames: + nr_filtered, dev_count, skipped_hostnames = inventory_selector(nr, hostname=hostnames) + else: + if device_type: + nr_filtered, dev_count, skipped_hostnames = inventory_selector(nr, resync=resync, device_type=device_type) + elif group: + nr_filtered, dev_count, skipped_hostnames = inventory_selector(nr, resync=resync, group=group) + else: + # all devices + nr_filtered, dev_count, skipped_hostnames = inventory_selector(nr, resync=resync) + + if skipped_hostnames: + logger.info( + "Device(s) already synchronized, skipping ({}): {}".format( + len(skipped_hostnames), ", ".join(skipped_hostnames) + ) + ) + return nr_filtered, dev_count, skipped_hostnames + + +@job_wrapper +def confirm_devices( + prev_job_id: int, + hostnames: List[str], + job_id: Optional[int] = None, + scheduled_by: Optional[str] = None, + resync: bool = False, +) -> NornirJobResult: + logger = get_logger() + nr = cnaas_init() + + nr_filtered, dev_count, skipped_hostnames = select_devices(nr, hostnames, resync) + + device_list = list(nr_filtered.inventory.hosts.keys()) + logger.info("Device(s) selected for commit-confirm ({}): {}".format(dev_count, ", ".join(device_list))) + + try: + nrresult = nr_filtered.run(task=napalm_confirm_commit, prev_job_id=prev_job_id) + except Exception as e: + logger.exception("Exception while confirm-commit devices: {}".format(str(e))) + try: + with sqla_session() as session: + logger.info( + "Releasing lock for devices from syncto job: {} (in commit-job {})".format(prev_job_id, job_id) + ) + Joblock.release_lock(session, job_id=prev_job_id) + except Exception: + logger.error("Unable to release devices lock after syncto job") + return NornirJobResult(nrresult=nrresult) + + failed_hosts = list(nrresult.failed_hosts.keys()) + for hostname in failed_hosts: + logger.error("Commit-confirm failed for device '{}'".format(hostname)) + + # mark synced, remove mark sync and release job from sync_devices. break into functions? + if nrresult.failed: + logger.error("Not all devices were successfully commit-confirmed") + + with sqla_session() as session: + for host, results in nrresult.items(): + if host in failed_hosts or len(results) != 1: + logger.debug("Setting device as unsync for failed commit-confirm on device {}".format(host)) + dev: Device = session.query(Device).filter(Device.hostname == host).one() + dev.synchronized = False + dev.last_seen = datetime.datetime.utcnow() + else: + dev: Device = session.query(Device).filter(Device.hostname == host).one() + dev.synchronized = True + dev.last_seen = datetime.datetime.utcnow() + + logger.info("Releasing lock for devices from syncto job: {} (in commit-job {})".format(prev_job_id, job_id)) + Joblock.release_lock(session, job_id=prev_job_id) + + return NornirJobResult(nrresult=nrresult) + + @job_wrapper def sync_devices( hostnames: Optional[List[str]] = None, @@ -608,25 +702,7 @@ def sync_devices( """ logger = get_logger() nr = cnaas_init() - dev_count = 0 - skipped_hostnames = [] - if hostnames: - nr_filtered, dev_count, skipped_hostnames = inventory_selector(nr, hostname=hostnames) - else: - if device_type: - nr_filtered, dev_count, skipped_hostnames = inventory_selector(nr, resync=resync, device_type=device_type) - elif group: - nr_filtered, dev_count, skipped_hostnames = inventory_selector(nr, resync=resync, group=group) - else: - # all devices - nr_filtered, dev_count, skipped_hostnames = inventory_selector(nr, resync=resync) - - if skipped_hostnames: - logger.info( - "Device(s) already synchronized, skipping ({}): {}".format( - len(skipped_hostnames), ", ".join(skipped_hostnames) - ) - ) + nr_filtered, dev_count, skipped_hostnames = select_devices(nr, hostnames, device_type, group, resync) device_list = list(nr_filtered.inventory.hosts.keys()) logger.info("Device(s) selected for synchronization ({}): {}".format(dev_count, ", ".join(device_list))) @@ -689,6 +765,7 @@ def sync_devices( change_scores.append(0) logger.debug("Empty diff for host {}, 0 change score".format(host)) + # break into separate function? nr_confighash = None if dry_run and force: # update config hash for devices that had an empty diff because local @@ -729,7 +806,8 @@ def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = False dev.last_seen = datetime.datetime.utcnow() - else: + # if next job will commit, that job will mark synchronized on success + elif api_settings.COMMIT_CONFIRMED_MODE != 2: dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = True dev.last_seen = datetime.datetime.utcnow() @@ -737,7 +815,7 @@ def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = True dev.last_seen = datetime.datetime.utcnow() - if not dry_run: + if not dry_run and api_settings.COMMIT_CONFIRMED_MODE != 2: logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) Joblock.release_lock(session, job_id=job_id) @@ -772,6 +850,20 @@ def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): f"Auto-push of config to device {hostnames} failed because change score of " f"{total_change_score} is higher than auto-push limit {AUTOPUSH_MAX_SCORE}" ) + elif api_settings.COMMIT_CONFIRMED_MODE == 2: + if not changed_hosts: + logger.info("None of the selected host has any changes (diff), skipping commit-confirm") + logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) + Joblock.release_lock(session, job_id=job_id) + else: + scheduler = Scheduler() + next_job_id = scheduler.add_onetime_job( + "cnaas_nms.devicehandler.sync_devices:confirm_devices", + when=0, + scheduled_by=scheduled_by, + kwargs={"prev_job_id": job_id, "hostnames": changed_hosts}, + ) + logger.info(f"Commit-confirm for job id {job_id} scheduled as job id {next_job_id}") return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id, change_score=total_change_score) From 784c5c8bd7849ce277786c654017984ef8eef9fd Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 16 Jan 2023 11:31:01 +0100 Subject: [PATCH 006/169] let nornir handle connection open/close when doing commit_confirm_mode 2 instead of closing connection immediately. warn if syncing more than 50 devices, connection timeouts might cause issues? --- src/cnaas_nms/devicehandler/sync_devices.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index f78cff32..f3018106 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -445,7 +445,8 @@ def push_sync_device( "Synchronize device config for host: {} ({}:{})".format(task.host.name, task.host.hostname, task.host.port) ) - task.host.open_connection("napalm", configuration=task.nornir.config) + if api_settings.COMMIT_CONFIRMED_MODE != 2: + task.host.open_connection("napalm", configuration=task.nornir.config) task_args = { "name": "Sync device config", "replace": True, @@ -461,7 +462,8 @@ def push_sync_device( task_args["task"] = napalm_configure_confirmed task_args["job_id"] = job_id task.run(**task_args) - task.host.close_connection("napalm") + if api_settings.COMMIT_CONFIRMED_MODE != 2: + task.host.close_connection("napalm") if task.results[1].diff: config = task.results[1].host["config"] @@ -609,6 +611,10 @@ def select_devices( len(skipped_hostnames), ", ".join(skipped_hostnames) ) ) + + if dev_count > 50 and api_settings.COMMIT_CONFIRMED_MODE == 2: + logger.warning("commit_confirmed_mode 2 might not be reliable for syncs of more than 50 devices") + return nr_filtered, dev_count, skipped_hostnames From 9fbcd7aeaca9336c3b30928af8ad8df5b66bcbe7 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 18 Jan 2023 10:32:02 +0100 Subject: [PATCH 007/169] allow overriding of settings for testing purpose. add tests for syncto commitconfirm mode 1 --- src/cnaas_nms/app_settings.py | 2 + src/cnaas_nms/devicehandler/sync_devices.py | 8 ++- .../devicehandler/tests/test_syncto.py | 54 +++++++++++++++++++ 3 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 src/cnaas_nms/devicehandler/tests/test_syncto.py diff --git a/src/cnaas_nms/app_settings.py b/src/cnaas_nms/app_settings.py index 5b7404f7..65904c9e 100644 --- a/src/cnaas_nms/app_settings.py +++ b/src/cnaas_nms/app_settings.py @@ -51,6 +51,7 @@ class ApiSettings(BaseSettings): MGMTDOMAIN_RESERVED_COUNT: int = 5 COMMIT_CONFIRMED_MODE: int = 1 COMMIT_CONFIRMED_TIMEOUT: int = 300 + SETTINGS_OVERRIDE: Optional[dict] = None def construct_api_settings() -> ApiSettings: @@ -80,6 +81,7 @@ def construct_api_settings() -> ApiSettings: MGMTDOMAIN_RESERVED_COUNT=config.get("mgmtdomain_reserved_count", 5), COMMIT_CONFIRMED_MODE=config.get("commit_confirmed_mode", 1), COMMIT_CONFIRMED_TIMEOUT=config.get("commit_confirmed_timeout", 300), + SETTINGS_OVERRIDE=config.get("settings_override", None), ) else: return ApiSettings() diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index f3018106..33e05c57 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -337,11 +337,15 @@ def populate_device_vars( # the list of configuration variables. The idea is to store secret # configuration outside of the templates repository. template_secrets = get_environment_secrets() + # For testing purposes allow overriding of settings instead of requiring git updates + override_dict = {} + if api_settings.SETTINGS_OVERRIDE and isinstance(api_settings.SETTINGS_OVERRIDE, dict): + override_dict = api_settings.SETTINGS_OVERRIDE # Merge all dicts with variables into one, later row overrides # Device variables override any names from settings, for example the # interfaces list from settings are replaced with an interface list from # device variables that contains more information - device_variables = {**settings, **device_variables, **template_secrets} + device_variables = {**settings, **device_variables, **template_secrets, **override_dict} return device_variables @@ -856,7 +860,7 @@ def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): f"Auto-push of config to device {hostnames} failed because change score of " f"{total_change_score} is higher than auto-push limit {AUTOPUSH_MAX_SCORE}" ) - elif api_settings.COMMIT_CONFIRMED_MODE == 2: + elif api_settings.COMMIT_CONFIRMED_MODE == 2 and not dry_run: if not changed_hosts: logger.info("None of the selected host has any changes (diff), skipping commit-confirm") logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py new file mode 100644 index 00000000..a4519e40 --- /dev/null +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -0,0 +1,54 @@ +import os +import time +from typing import Optional + +import pkg_resources +import pytest +import yaml + +from cnaas_nms.db.job import Job, JobStatus +from cnaas_nms.db.session import sqla_session +from cnaas_nms.db.settings import api_settings +from cnaas_nms.devicehandler.sync_devices import sync_devices +from cnaas_nms.scheduler.scheduler import Scheduler + + +@pytest.fixture +def testdata(scope="session"): + data_dir = pkg_resources.resource_filename(__name__, "data") + with open(os.path.join(data_dir, "testdata.yml"), "r") as f_testdata: + return yaml.safe_load(f_testdata) + + +@pytest.fixture +def scheduler(scope="session"): + scheduler = Scheduler() + scheduler.start() + return scheduler + + +@pytest.mark.integration +def test_syncto_commitmode_1(testdata, scheduler): + api_settings.COMMIT_CONFIRMED_MODE = 1 + api_settings.SETTINGS_OVERRIDE = {"cli_append_str": "interface Management1\ndescription test"} + job_id = scheduler.add_onetime_job( + sync_devices, + when=0, + scheduled_by="test_user", + kwargs={ + "hostnames": ["eosdist1"], + }, + ) + job_res: Optional[Job] = None + job_dict: Optional[dict] = None + with sqla_session() as session: + time.sleep(2) + for i in range(1, 5): + if not job_res or job_res.status == JobStatus.SCHEDULED or job_res.status == JobStatus.RUNNING: + job_res = session.query(Job).filter(Job.id == job_id).one() + job_dict = job_res.as_dict() + else: + break + breakpoint() + assert job_dict["status"] == "FINISHED" + assert job_dict["result"]["devices"]["eosdist1"]["failed"] is False From d1a2e8f1e9b0041391228774c852b0c1bb320070 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 18 Jan 2023 10:54:38 +0100 Subject: [PATCH 008/169] fix scheduler state check for integrationtests --- src/cnaas_nms/api/interface.py | 1 - src/cnaas_nms/devicehandler/tests/test_syncto.py | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cnaas_nms/api/interface.py b/src/cnaas_nms/api/interface.py index c7665a45..cbbb0a84 100644 --- a/src/cnaas_nms/api/interface.py +++ b/src/cnaas_nms/api/interface.py @@ -222,7 +222,6 @@ def put(self, hostname): elif "data" in if_dict and not if_dict["data"]: intfdata = None - if intfdata != intfdata_original: intf.data = intfdata updated = True diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index a4519e40..1fd2b025 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -5,6 +5,7 @@ import pkg_resources import pytest import yaml +from apscheduler.schedulers.base import STATE_STOPPED from cnaas_nms.db.job import Job, JobStatus from cnaas_nms.db.session import sqla_session @@ -23,7 +24,8 @@ def testdata(scope="session"): @pytest.fixture def scheduler(scope="session"): scheduler = Scheduler() - scheduler.start() + if scheduler.get_scheduler().state == STATE_STOPPED: + scheduler.start() return scheduler @@ -49,6 +51,5 @@ def test_syncto_commitmode_1(testdata, scheduler): job_dict = job_res.as_dict() else: break - breakpoint() assert job_dict["status"] == "FINISHED" assert job_dict["result"]["devices"]["eosdist1"]["failed"] is False From e2b5f9065fb070dfafb816c2b70e7d9fffb99a54 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 18 Jan 2023 11:03:46 +0100 Subject: [PATCH 009/169] fix scheduler state check for integrationtests --- src/cnaas_nms/scheduler/tests/test_scheduler.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cnaas_nms/scheduler/tests/test_scheduler.py b/src/cnaas_nms/scheduler/tests/test_scheduler.py index 740e4291..96d35404 100644 --- a/src/cnaas_nms/scheduler/tests/test_scheduler.py +++ b/src/cnaas_nms/scheduler/tests/test_scheduler.py @@ -5,6 +5,7 @@ import pkg_resources import pytest import yaml +from apscheduler.schedulers.base import STATE_STOPPED from cnaas_nms.db.job import Job, JobStatus from cnaas_nms.db.session import sqla_session @@ -35,7 +36,8 @@ def requirements(self, postgresql): @classmethod def setUpClass(cls) -> None: scheduler = Scheduler() - scheduler.start() + if scheduler.get_scheduler().state == STATE_STOPPED: + scheduler.start() @classmethod def tearDownClass(cls) -> None: From 1105fbd9f1e7837019c9895bd35ff35fe91572a4 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 18 Jan 2023 11:25:26 +0100 Subject: [PATCH 010/169] mark equipment instead of integrationtest for github actions --- src/cnaas_nms/devicehandler/tests/test_syncto.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index 1fd2b025..a7d1be08 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -29,7 +29,7 @@ def scheduler(scope="session"): return scheduler -@pytest.mark.integration +@pytest.mark.equipment def test_syncto_commitmode_1(testdata, scheduler): api_settings.COMMIT_CONFIRMED_MODE = 1 api_settings.SETTINGS_OVERRIDE = {"cli_append_str": "interface Management1\ndescription test"} From f0918895ee1f4143432735ae54dcdc076f03c93e Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 18 Jan 2023 17:03:17 +0100 Subject: [PATCH 011/169] run syncto with mode 0, 1 and 2. check captured logging data for correct mode --- src/cnaas_nms/devicehandler/sync_devices.py | 5 +++ .../devicehandler/tests/data/testdata.yml | 4 ++ .../devicehandler/tests/test_syncto.py | 43 ++++++++++++++++--- 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 33e05c57..9d200de8 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -465,6 +465,11 @@ def push_sync_device( else: task_args["task"] = napalm_configure_confirmed task_args["job_id"] = job_id + logger.debug( + "Commit confirm mode for host {}: {} (dry_run: {})".format( + task.host.name, api_settings.COMMIT_CONFIRMED_MODE, dry_run + ) + ) task.run(**task_args) if api_settings.COMMIT_CONFIRMED_MODE != 2: task.host.close_connection("napalm") diff --git a/src/cnaas_nms/devicehandler/tests/data/testdata.yml b/src/cnaas_nms/devicehandler/tests/data/testdata.yml index 16a82fcf..0d4c6ee2 100644 --- a/src/cnaas_nms/devicehandler/tests/data/testdata.yml +++ b/src/cnaas_nms/devicehandler/tests/data/testdata.yml @@ -85,3 +85,7 @@ linknets_mlag_nonpeers: ipv4_network: null redundant_link: true site_id: null +syncto_device_hostnames: + - "eosdist1" +syncto_settings_override: + cli_append_str: "interface Management1\ndescription test" diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index a7d1be08..7bbdee69 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -1,3 +1,4 @@ +import logging import os import time from typing import Optional @@ -29,16 +30,13 @@ def scheduler(scope="session"): return scheduler -@pytest.mark.equipment -def test_syncto_commitmode_1(testdata, scheduler): - api_settings.COMMIT_CONFIRMED_MODE = 1 - api_settings.SETTINGS_OVERRIDE = {"cli_append_str": "interface Management1\ndescription test"} +def run_syncto_job(scheduler, testdata: dict) -> Optional[dict]: job_id = scheduler.add_onetime_job( sync_devices, when=0, scheduled_by="test_user", kwargs={ - "hostnames": ["eosdist1"], + "hostnames": testdata["syncto_device_hostnames"], }, ) job_res: Optional[Job] = None @@ -51,5 +49,40 @@ def test_syncto_commitmode_1(testdata, scheduler): job_dict = job_res.as_dict() else: break + return job_dict + + +@pytest.mark.equipment +def test_syncto_commitmode_0(testdata, scheduler, settings_directory, templates_directory, postgresql, redis, caplog): + api_settings.COMMIT_CONFIRMED_MODE = 0 + api_settings.SETTINGS_OVERRIDE = testdata["syncto_settings_override"] + with caplog.at_level(logging.DEBUG): + job_dict = run_syncto_job(scheduler, testdata) + hostname = testdata["syncto_device_hostnames"][0] + assert f"Commit confirm mode for host {hostname}: 0" in caplog.text + assert job_dict["status"] == "FINISHED" + assert job_dict["result"]["devices"]["eosdist1"]["failed"] is False + + +@pytest.mark.equipment +def test_syncto_commitmode_1(testdata, scheduler, settings_directory, templates_directory, postgresql, redis, caplog): + api_settings.COMMIT_CONFIRMED_MODE = 1 + api_settings.SETTINGS_OVERRIDE = testdata["syncto_settings_override"] + with caplog.at_level(logging.DEBUG): + job_dict = run_syncto_job(scheduler, testdata) + hostname = testdata["syncto_device_hostnames"][0] + assert f"Commit confirm mode for host {hostname}: 1" in caplog.text + assert job_dict["status"] == "FINISHED" + assert job_dict["result"]["devices"]["eosdist1"]["failed"] is False + + +@pytest.mark.equipment +def test_syncto_commitmode_2(testdata, scheduler, settings_directory, templates_directory, postgresql, redis, caplog): + api_settings.COMMIT_CONFIRMED_MODE = 2 + api_settings.SETTINGS_OVERRIDE = testdata["syncto_settings_override"] + with caplog.at_level(logging.DEBUG): + job_dict = run_syncto_job(scheduler, testdata) + hostname = testdata["syncto_device_hostnames"][0] + assert f"Commit confirm mode for host {hostname}: 2" in caplog.text assert job_dict["status"] == "FINISHED" assert job_dict["result"]["devices"]["eosdist1"]["failed"] is False From f1f0669737e78a1400eba454374c80119ecca134 Mon Sep 17 00:00:00 2001 From: Morten Brekkevold Date: Thu, 19 Jan 2023 10:32:33 +0100 Subject: [PATCH 012/169] Ensure SQL schema is updated before test run The PostgreSQL test database is usually initialized through some SQL fixture file. This fixture doesn't necessarily include all the latest schema changes. This ensures that once the test PostgreSQL database is up and running, alembic is used to apply all the available migrations up to the head revision. --- src/conftest.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/conftest.py b/src/conftest.py index 8b166bdd..abd55c5d 100644 --- a/src/conftest.py +++ b/src/conftest.py @@ -1,10 +1,10 @@ import os import socket +import subprocess import time from contextlib import closing import pytest - from git import Repo @@ -71,9 +71,16 @@ def postgresql(request): # A more complete solution would check that we can actually establish a PostgreSQL client # connection. time.sleep(5) + request.getfixturevalue("alembic_upgrade") yield True +@pytest.fixture(scope="session") +def alembic_upgrade(pytestconfig): + """Ensures the sql database schema is up-to-date at the start of a test run""" + subprocess.check_call(["alembic", "upgrade", "head"], cwd=pytestconfig.rootpath) + + def wait_for_port(host: str, port: int, tries=10) -> bool: """Waits for TCP port to receive connections""" for retry in range(tries): From 3a9dec25d126f5be76bf05042845ed40f609d3a2 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Thu, 19 Jan 2023 16:37:50 +0100 Subject: [PATCH 013/169] try to get code coverage for the confirm_devices function --- .../devicehandler/tests/test_syncto.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index 7bbdee69..b2df9c99 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -30,13 +30,14 @@ def scheduler(scope="session"): return scheduler -def run_syncto_job(scheduler, testdata: dict) -> Optional[dict]: +def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[dict]: job_id = scheduler.add_onetime_job( sync_devices, when=0, scheduled_by="test_user", kwargs={ "hostnames": testdata["syncto_device_hostnames"], + "dry_run": dry_run, }, ) job_res: Optional[Job] = None @@ -61,7 +62,7 @@ def test_syncto_commitmode_0(testdata, scheduler, settings_directory, templates_ hostname = testdata["syncto_device_hostnames"][0] assert f"Commit confirm mode for host {hostname}: 0" in caplog.text assert job_dict["status"] == "FINISHED" - assert job_dict["result"]["devices"]["eosdist1"]["failed"] is False + assert job_dict["result"]["devices"][hostname]["failed"] is False @pytest.mark.equipment @@ -73,7 +74,7 @@ def test_syncto_commitmode_1(testdata, scheduler, settings_directory, templates_ hostname = testdata["syncto_device_hostnames"][0] assert f"Commit confirm mode for host {hostname}: 1" in caplog.text assert job_dict["status"] == "FINISHED" - assert job_dict["result"]["devices"]["eosdist1"]["failed"] is False + assert job_dict["result"]["devices"][hostname]["failed"] is False @pytest.mark.equipment @@ -81,8 +82,16 @@ def test_syncto_commitmode_2(testdata, scheduler, settings_directory, templates_ api_settings.COMMIT_CONFIRMED_MODE = 2 api_settings.SETTINGS_OVERRIDE = testdata["syncto_settings_override"] with caplog.at_level(logging.DEBUG): - job_dict = run_syncto_job(scheduler, testdata) + job_dict = run_syncto_job(scheduler, testdata, dry_run=False) hostname = testdata["syncto_device_hostnames"][0] assert f"Commit confirm mode for host {hostname}: 2" in caplog.text assert job_dict["status"] == "FINISHED" - assert job_dict["result"]["devices"]["eosdist1"]["failed"] is False + assert job_dict["result"]["devices"][hostname]["failed"] is False + + # Revert change + api_settings.SETTINGS_OVERRIDE = None + with caplog.at_level(logging.DEBUG): + job_dict = run_syncto_job(scheduler, testdata, dry_run=False) + assert "selected for commit-confirm" in caplog.text + assert job_dict["status"] == "FINISHED" + assert job_dict["result"]["devices"][hostname]["failed"] is False From 6b3d3629fab9bf817de4ea0aad87ec8b9b2ec28e Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 23 Jan 2023 09:24:14 +0100 Subject: [PATCH 014/169] longer timeout since sync doesn't finish on kvm vm --- src/cnaas_nms/devicehandler/tests/test_syncto.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index b2df9c99..f4a2e159 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -44,7 +44,7 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ job_dict: Optional[dict] = None with sqla_session() as session: time.sleep(2) - for i in range(1, 5): + for i in range(1, 15): if not job_res or job_res.status == JobStatus.SCHEDULED or job_res.status == JobStatus.RUNNING: job_res = session.query(Job).filter(Job.id == job_id).one() job_dict = job_res.as_dict() From 63c3f69810a2a0c8dd75e71ed88a9cf8bea665ae Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 23 Jan 2023 14:43:26 +0100 Subject: [PATCH 015/169] resync=True to get device to new config with settings_override even if already synced --- src/cnaas_nms/devicehandler/tests/test_syncto.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index f4a2e159..dc1c7ec3 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -38,6 +38,7 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ kwargs={ "hostnames": testdata["syncto_device_hostnames"], "dry_run": dry_run, + "resync": True, }, ) job_res: Optional[Job] = None From 1eec6bd0b5c75e2cd373b83c57eabfa69db3a78c Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 23 Jan 2023 17:39:22 +0100 Subject: [PATCH 016/169] reattempt acquire joblock, sometimes unittest can fail otherwise --- src/cnaas_nms/devicehandler/sync_devices.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 9d200de8..cd246282 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -1,5 +1,6 @@ import datetime import os +import time from hashlib import sha256 from ipaddress import IPv4Address, IPv4Interface from typing import List, Optional, Tuple @@ -739,7 +740,15 @@ def sync_devices( if not dry_run: with sqla_session() as session: logger.info("Trying to acquire lock for devices to run syncto job: {}".format(job_id)) - if not Joblock.acquire_lock(session, name="devices", job_id=job_id): + max_attempts = 5 + lock_ok: bool = False + for i in range(max_attempts): + lock_ok = Joblock.acquire_lock(session, name="devices", job_id=job_id) + if lock_ok: + break + else: + time.sleep(2) + if not lock_ok: raise JoblockError("Unable to acquire lock for configuring devices") try: From a32d84fba5c0e337748b7ec42e298cd9fb018760 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 23 Jan 2023 17:40:37 +0100 Subject: [PATCH 017/169] syncto tests: sleep in between getting jobstatus. wait for next_job to finish for commit confirm mode 2. --- .../devicehandler/tests/test_syncto.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index dc1c7ec3..b33925f4 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -43,13 +43,24 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ ) job_res: Optional[Job] = None job_dict: Optional[dict] = None + jobstatus_wait = [JobStatus.SCHEDULED, JobStatus.RUNNING] with sqla_session() as session: - time.sleep(2) - for i in range(1, 15): - if not job_res or job_res.status == JobStatus.SCHEDULED or job_res.status == JobStatus.RUNNING: - job_res = session.query(Job).filter(Job.id == job_id).one() + for i in range(1, 30): + time.sleep(1) + if not job_res or job_res.status in jobstatus_wait: + job_res: Job = session.query(Job).filter(Job.id == job_id).one() job_dict = job_res.as_dict() + # if next_job_id scheduled for confirm action, wait for that also + if job_res.next_job_id: + next_job_res = Optional[Job] = None + for j in range(1, 30): + time.sleep(1) + if not next_job_res or next_job_res.status in jobstatus_wait: + next_job_res = session.query(Job).filter(Job.id == job_res.next_job_id).one() + else: + break else: + print("test run_syncto_job run {} status {}".format(i, JobStatus.name)) break return job_dict From b7af7eb2a4d842b1823dc362de2517ed5b9481f2 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 24 Jan 2023 16:00:14 +0100 Subject: [PATCH 018/169] more verbose logging for pytest. test super-long timeout for config test --- docker/api/pytest.sh | 2 +- src/cnaas_nms/devicehandler/tests/test_syncto.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/api/pytest.sh b/docker/api/pytest.sh index fef7da3f..19c034fa 100755 --- a/docker/api/pytest.sh +++ b/docker/api/pytest.sh @@ -1,6 +1,6 @@ #!/bin/bash -PYTESTARGS=() +PYTESTARGS=("-vv" "--showlocals") if [ ! -z "$NO_EQUIPMENTTEST" ] ; then PYTESTARGS+=("-m" "not equipment") diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index b33925f4..bcda4d6d 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -45,7 +45,7 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ job_dict: Optional[dict] = None jobstatus_wait = [JobStatus.SCHEDULED, JobStatus.RUNNING] with sqla_session() as session: - for i in range(1, 30): + for i in range(1, 300): time.sleep(1) if not job_res or job_res.status in jobstatus_wait: job_res: Job = session.query(Job).filter(Job.id == job_id).one() @@ -53,7 +53,7 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ # if next_job_id scheduled for confirm action, wait for that also if job_res.next_job_id: next_job_res = Optional[Job] = None - for j in range(1, 30): + for j in range(1, 300): time.sleep(1) if not next_job_res or next_job_res.status in jobstatus_wait: next_job_res = session.query(Job).filter(Job.id == job_res.next_job_id).one() From 295e15eb09f87abcf34f34012249753c1b7a191c Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 25 Jan 2023 09:35:26 +0100 Subject: [PATCH 019/169] cleanup after scheduler fixture. print jobstatus from test if not == finished --- src/cnaas_nms/devicehandler/tests/test_syncto.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index bcda4d6d..eb103195 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -13,6 +13,7 @@ from cnaas_nms.db.settings import api_settings from cnaas_nms.devicehandler.sync_devices import sync_devices from cnaas_nms.scheduler.scheduler import Scheduler +from cnaas_nms.tools.log import get_logger @pytest.fixture @@ -27,10 +28,14 @@ def scheduler(scope="session"): scheduler = Scheduler() if scheduler.get_scheduler().state == STATE_STOPPED: scheduler.start() - return scheduler + yield scheduler + time.sleep(3) + scheduler.get_scheduler().print_jobs() + scheduler.shutdown() def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[dict]: + logger = get_logger() job_id = scheduler.add_onetime_job( sync_devices, when=0, @@ -45,7 +50,7 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ job_dict: Optional[dict] = None jobstatus_wait = [JobStatus.SCHEDULED, JobStatus.RUNNING] with sqla_session() as session: - for i in range(1, 300): + for i in range(1, 30): time.sleep(1) if not job_res or job_res.status in jobstatus_wait: job_res: Job = session.query(Job).filter(Job.id == job_id).one() @@ -53,15 +58,16 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ # if next_job_id scheduled for confirm action, wait for that also if job_res.next_job_id: next_job_res = Optional[Job] = None - for j in range(1, 300): + for j in range(1, 30): time.sleep(1) if not next_job_res or next_job_res.status in jobstatus_wait: next_job_res = session.query(Job).filter(Job.id == job_res.next_job_id).one() else: break else: - print("test run_syncto_job run {} status {}".format(i, JobStatus.name)) break + if job_dict["status"] != "FINISHED": + logger.debug("test run_syncto_job job status '{}': {}".format(job_dict["status"], job_dict)) return job_dict From bf1ecde33dbaf61283f7b54924f45c671fa26db5 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 25 Jan 2023 14:24:44 +0100 Subject: [PATCH 020/169] make sure to refresh session objects between polls --- src/cnaas_nms/devicehandler/tests/test_syncto.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index eb103195..d0923573 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -6,7 +6,6 @@ import pkg_resources import pytest import yaml -from apscheduler.schedulers.base import STATE_STOPPED from cnaas_nms.db.job import Job, JobStatus from cnaas_nms.db.session import sqla_session @@ -24,10 +23,9 @@ def testdata(scope="session"): @pytest.fixture -def scheduler(scope="session"): +def scheduler(scope="module"): scheduler = Scheduler() - if scheduler.get_scheduler().state == STATE_STOPPED: - scheduler.start() + scheduler.start() yield scheduler time.sleep(3) scheduler.get_scheduler().print_jobs() @@ -54,6 +52,7 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ time.sleep(1) if not job_res or job_res.status in jobstatus_wait: job_res: Job = session.query(Job).filter(Job.id == job_id).one() + session.refresh(job_res) job_dict = job_res.as_dict() # if next_job_id scheduled for confirm action, wait for that also if job_res.next_job_id: @@ -62,6 +61,7 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ time.sleep(1) if not next_job_res or next_job_res.status in jobstatus_wait: next_job_res = session.query(Job).filter(Job.id == job_res.next_job_id).one() + session.refresh(next_job_res) else: break else: From a3a5fc35cb3f89ab6465577e7bd08e6471e1cb4e Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 25 Jan 2023 16:35:59 +0100 Subject: [PATCH 021/169] add scheduler fixture to global conftest for pytest, and make both syncto and test scheduler use that one with scope=session to only have one scheduler. rework test_scheduler to pytest format. --- .../devicehandler/tests/test_syncto.py | 13 +-- .../scheduler/tests/test_scheduler.py | 104 ++++++------------ src/conftest.py | 11 ++ 3 files changed, 47 insertions(+), 81 deletions(-) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index d0923573..0d7b9b46 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -11,27 +11,16 @@ from cnaas_nms.db.session import sqla_session from cnaas_nms.db.settings import api_settings from cnaas_nms.devicehandler.sync_devices import sync_devices -from cnaas_nms.scheduler.scheduler import Scheduler from cnaas_nms.tools.log import get_logger @pytest.fixture -def testdata(scope="session"): +def testdata(scope="module"): data_dir = pkg_resources.resource_filename(__name__, "data") with open(os.path.join(data_dir, "testdata.yml"), "r") as f_testdata: return yaml.safe_load(f_testdata) -@pytest.fixture -def scheduler(scope="module"): - scheduler = Scheduler() - scheduler.start() - yield scheduler - time.sleep(3) - scheduler.get_scheduler().print_jobs() - scheduler.shutdown() - - def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[dict]: logger = get_logger() job_id = scheduler.add_onetime_job( diff --git a/src/cnaas_nms/scheduler/tests/test_scheduler.py b/src/cnaas_nms/scheduler/tests/test_scheduler.py index 96d35404..0b9b78bd 100644 --- a/src/cnaas_nms/scheduler/tests/test_scheduler.py +++ b/src/cnaas_nms/scheduler/tests/test_scheduler.py @@ -1,16 +1,10 @@ -import os import time -import unittest -import pkg_resources import pytest -import yaml -from apscheduler.schedulers.base import STATE_STOPPED from cnaas_nms.db.job import Job, JobStatus from cnaas_nms.db.session import sqla_session from cnaas_nms.scheduler.jobresult import DictJobResult -from cnaas_nms.scheduler.scheduler import Scheduler from cnaas_nms.scheduler.wrapper import job_wrapper @@ -27,68 +21,40 @@ def job_testfunc_exception(text="", job_id=None, scheduled_by=None): @pytest.mark.integration -class InitTests(unittest.TestCase): - @pytest.fixture(autouse=True) - def requirements(self, postgresql): - """Ensures the required pytest fixtures are loaded implicitly for all these tests""" - pass +def test_add_schedule(postgresql, scheduler): + job1_id = scheduler.add_onetime_job( + job_testfunc_success, when=1, scheduled_by="test_user", kwargs={"text": "success"} + ) + job2_id = scheduler.add_onetime_job( + job_testfunc_exception, when=1, scheduled_by="test_user", kwargs={"text": "exception"} + ) + assert isinstance(job1_id, int) + assert isinstance(job2_id, int) + print(f"Test job 1 scheduled as ID { job1_id }") + print(f"Test job 2 scheduled as ID { job2_id }") + time.sleep(3) + with sqla_session() as session: + job1 = session.query(Job).filter(Job.id == job1_id).one_or_none() + assert isinstance(job1, Job), "Test job 1 could not be found" + assert job1.status == JobStatus.FINISHED, "Test job 1 did not finish" + assert job1.result == {"status": "success"}, "Test job 1 returned bad status" + job2 = session.query(Job).filter(Job.id == job2_id).one_or_none() + assert isinstance(job2, Job), "Test job 2 could not be found" + assert job2.status == JobStatus.EXCEPTION, "Test job 2 did not make exception" + assert "message" in job2.exception, "Test job 2 did not contain message in exception" - @classmethod - def setUpClass(cls) -> None: - scheduler = Scheduler() - if scheduler.get_scheduler().state == STATE_STOPPED: - scheduler.start() - @classmethod - def tearDownClass(cls) -> None: - scheduler = Scheduler() - time.sleep(3) - scheduler.get_scheduler().print_jobs() - scheduler.shutdown() - - def setUp(self): - data_dir = pkg_resources.resource_filename(__name__, "data") - with open(os.path.join(data_dir, "testdata.yml"), "r") as f_testdata: - self.testdata = yaml.safe_load(f_testdata) - - def test_add_schedule(self): - scheduler = Scheduler() - job1_id = scheduler.add_onetime_job( - job_testfunc_success, when=1, scheduled_by="test_user", kwargs={"text": "success"} - ) - job2_id = scheduler.add_onetime_job( - job_testfunc_exception, when=1, scheduled_by="test_user", kwargs={"text": "exception"} - ) - assert isinstance(job1_id, int) - assert isinstance(job2_id, int) - print(f"Test job 1 scheduled as ID { job1_id }") - print(f"Test job 2 scheduled as ID { job2_id }") - time.sleep(3) - with sqla_session() as session: - job1 = session.query(Job).filter(Job.id == job1_id).one_or_none() - self.assertIsInstance(job1, Job, "Test job 1 could not be found") - self.assertEqual(job1.status, JobStatus.FINISHED, "Test job 1 did not finish") - self.assertEqual(job1.result, {"status": "success"}, "Test job 1 returned bad status") - job2 = session.query(Job).filter(Job.id == job2_id).one_or_none() - self.assertIsInstance(job2, Job, "Test job 2 could not be found") - self.assertEqual(job2.status, JobStatus.EXCEPTION, "Test job 2 did not make exception") - self.assertIn("message", job2.exception, "Test job 2 did not contain message in exception") - - def test_abort_schedule(self): - scheduler = Scheduler() - job3_id = scheduler.add_onetime_job( - job_testfunc_success, when=600, scheduled_by="test_user", kwargs={"text": "abort"} - ) - assert isinstance(job3_id, int) - print(f"Test job 3 scheduled as ID { job3_id }") - scheduler.remove_scheduled_job(job3_id) - time.sleep(3) - with sqla_session() as session: - job3 = session.query(Job).filter(Job.id == job3_id).one_or_none() - self.assertIsInstance(job3, Job, "Test job 3 could not be found") - self.assertEqual(job3.status, JobStatus.ABORTED, "Test job 3 did not abort") - self.assertEqual(job3.result, {"message": "removed"}, "Test job 3 returned bad status") - - -if __name__ == "__main__": - unittest.main() +@pytest.mark.integration +def test_abort_schedule(postgresql, scheduler): + job3_id = scheduler.add_onetime_job( + job_testfunc_success, when=600, scheduled_by="test_user", kwargs={"text": "abort"} + ) + assert isinstance(job3_id, int) + print(f"Test job 3 scheduled as ID { job3_id }") + scheduler.remove_scheduled_job(job3_id) + time.sleep(3) + with sqla_session() as session: + job3 = session.query(Job).filter(Job.id == job3_id).one_or_none() + assert isinstance(job3, Job), "Test job 3 could not be found" + assert job3.status == JobStatus.ABORTED, "Test job 3 did not abort" + assert job3.result == {"message": "removed"}, "Test job 3 returned bad status" diff --git a/src/conftest.py b/src/conftest.py index 8b166bdd..74b0ec7f 100644 --- a/src/conftest.py +++ b/src/conftest.py @@ -5,6 +5,7 @@ import pytest +from cnaas_nms.scheduler.scheduler import Scheduler from git import Repo @@ -85,3 +86,13 @@ def wait_for_port(host: str, port: int, tries=10) -> bool: time.sleep(0.5) print(f"NO RESPONSE from {host}:{port}") return False + + +@pytest.fixture +def scheduler(scope="session"): + scheduler = Scheduler() + scheduler.start() + yield scheduler + time.sleep(3) + scheduler.get_scheduler().print_jobs() + scheduler.shutdown() From 31c44ffde8ff48e76f1b2d7ea5e92db416c9ee93 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Thu, 26 Jan 2023 17:17:54 +0100 Subject: [PATCH 022/169] scope should be on decorator, not on function --- src/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/conftest.py b/src/conftest.py index 74b0ec7f..3ca89a2f 100644 --- a/src/conftest.py +++ b/src/conftest.py @@ -88,8 +88,8 @@ def wait_for_port(host: str, port: int, tries=10) -> bool: return False -@pytest.fixture -def scheduler(scope="session"): +@pytest.fixture(scope="session") +def scheduler(): scheduler = Scheduler() scheduler.start() yield scheduler From 981a5a949c0abc6b46ce8775674855345e553054 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 09:25:53 +0100 Subject: [PATCH 023/169] fix next_job_res typing, log bad status for confirm job as well --- src/cnaas_nms/devicehandler/tests/test_syncto.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index 0d7b9b46..e627a9d2 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -45,18 +45,22 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ job_dict = job_res.as_dict() # if next_job_id scheduled for confirm action, wait for that also if job_res.next_job_id: - next_job_res = Optional[Job] = None + confirm_job_res: Optional[Job] = None + confirm_job_dict: Optional[dict] = None for j in range(1, 30): time.sleep(1) - if not next_job_res or next_job_res.status in jobstatus_wait: - next_job_res = session.query(Job).filter(Job.id == job_res.next_job_id).one() - session.refresh(next_job_res) + if not confirm_job_res or confirm_job_res.status in jobstatus_wait: + confirm_job_res = session.query(Job).filter(Job.id == job_res.next_job_id).one() + session.refresh(confirm_job_res) + confirm_job_dict = confirm_job_res.as_dict() else: break + if confirm_job_dict and confirm_job_dict["status"] != "FINISHED": + logger.warning("test run_syncto_job confirm job bad status: {}".format(confirm_job_dict)) else: break if job_dict["status"] != "FINISHED": - logger.debug("test run_syncto_job job status '{}': {}".format(job_dict["status"], job_dict)) + logger.warning("test run_syncto_job job bad status: {}".format(job_dict)) return job_dict From b3721b4d557f9b2304cf11b9f35bb0030a5d8c0a Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 13:40:28 +0100 Subject: [PATCH 024/169] support commit mode 2 for junos --- src/cnaas_nms/devicehandler/sync_devices.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index cd246282..e4cf10c7 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -7,6 +7,7 @@ import yaml from napalm.eos import EOSDriver as NapalmEOSDriver +from napalm.junos import JunOSDriver as NapalmJunOSDriver from nornir.core import Nornir from nornir.core.task import MultiResult, Result from nornir_jinja2.plugins.tasks import template_file @@ -364,7 +365,7 @@ def napalm_configure_confirmed( n_device.commit_config(revert_in=api_settings.COMMIT_CONFIRMED_TIMEOUT) mode_2_supported = False if api_settings.COMMIT_CONFIRMED_MODE == 2: - if isinstance(n_device, NapalmEOSDriver): + if isinstance(n_device, (NapalmEOSDriver, NapalmJunOSDriver)): mode_2_supported = True else: logger.warn( @@ -388,6 +389,8 @@ def napalm_confirm_commit(task, prev_job_id: int = 0): if isinstance(n_device, NapalmEOSDriver): n_device.config_session = "job{}".format(prev_job_id) n_device.confirm_commit() + elif isinstance(n_device, NapalmJunOSDriver): + n_device.confirm_commit() def push_sync_device( From 8e6d969d08a90485b5234024d6b1ceb101e57597 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 13:45:20 +0100 Subject: [PATCH 025/169] make sure confirm job gets scheduled_by string --- src/cnaas_nms/devicehandler/sync_devices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index e4cf10c7..0b894dae 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -888,7 +888,7 @@ def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): "cnaas_nms.devicehandler.sync_devices:confirm_devices", when=0, scheduled_by=scheduled_by, - kwargs={"prev_job_id": job_id, "hostnames": changed_hosts}, + kwargs={"prev_job_id": job_id, "hostnames": changed_hosts, "scheduled_by": scheduled_by}, ) logger.info(f"Commit-confirm for job id {job_id} scheduled as job id {next_job_id}") From 1eb880dca32be6bf307efb90d3d7ddef794934c8 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 14:05:29 +0100 Subject: [PATCH 026/169] allow calling syncto with confirm_mode_override to override setting specified in yaml --- src/cnaas_nms/devicehandler/sync_devices.py | 44 +++++++++++++++++---- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 0b894dae..9f5e7c50 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -351,8 +351,24 @@ def populate_device_vars( return device_variables +def get_confirm_mode(confirm_mode_override: Optional[int] = None) -> int: + valid_modes = [0, 1, 2] + if confirm_mode_override and confirm_mode_override in valid_modes: + return confirm_mode_override + elif api_settings.COMMIT_CONFIRMED_MODE and api_settings.COMMIT_CONFIRMED_MODE in valid_modes: + return api_settings.COMMIT_CONFIRMED_MODE + else: + return 1 + + def napalm_configure_confirmed( - task, dry_run=None, configuration=None, replace=None, commit_message: str = "", job_id: int = 0 + task, + dry_run=None, + configuration=None, + replace=None, + commit_message: str = "", + job_id: int = 0, + commit_confirm_override: Optional[int] = None, ): """Configure device and set configure confirmed timeout to revert changes unless a confirm is received""" logger = get_logger() @@ -364,7 +380,7 @@ def napalm_configure_confirmed( if diff: n_device.commit_config(revert_in=api_settings.COMMIT_CONFIRMED_TIMEOUT) mode_2_supported = False - if api_settings.COMMIT_CONFIRMED_MODE == 2: + if get_confirm_mode(commit_confirm_override) == 2: if isinstance(n_device, (NapalmEOSDriver, NapalmJunOSDriver)): mode_2_supported = True else: @@ -373,7 +389,7 @@ def napalm_configure_confirmed( f"Falling back to mode 1 for device: {task.host.name}." ) - if api_settings.COMMIT_CONFIRMED_MODE == 1 or not mode_2_supported: + if get_confirm_mode(commit_confirm_override) == 1 or not mode_2_supported: if n_device.has_pending_commit(): n_device.confirm_commit() else: @@ -399,6 +415,7 @@ def push_sync_device( generate_only: bool = False, job_id: Optional[str] = None, scheduled_by: Optional[str] = None, + confirm_mode_override: Optional[int] = None, ): """ Nornir task to generate config and push to device @@ -408,7 +425,9 @@ def push_sync_device( dry_run: Don't commit config to device, just do compare/diff generate_only: Only generate text config, don't try to commit or even do dry_run compare to running config - + job_id: Job ID integer + scheduled_by: username of users that scheduled job + confirm_mode_override: integer to specify commit confirm mode Returns: """ @@ -469,6 +488,7 @@ def push_sync_device( else: task_args["task"] = napalm_configure_confirmed task_args["job_id"] = job_id + task_args["confirm_mode_override"] = confirm_mode_override logger.debug( "Commit confirm mode for host {}: {} (dry_run: {})".format( task.host.name, api_settings.COMMIT_CONFIRMED_MODE, dry_run @@ -698,6 +718,7 @@ def sync_devices( job_id: Optional[int] = None, scheduled_by: Optional[str] = None, resync: bool = False, + confirm_mode_override: Optional[int] = None, ) -> NornirJobResult: """Synchronize devices to their respective templates. If no arguments are specified then synchronize all devices that are currently out @@ -715,6 +736,8 @@ def sync_devices( scheduled_by: Username from JWT resync: Re-synchronize a device even if it's marked as synced in the database, a device selected by hostname is always re-synced + confirm_mode_override: Override settings commit confirm mode, optional int + with value 0, 1 or 2 Returns: NornirJobResult @@ -755,7 +778,12 @@ def sync_devices( raise JoblockError("Unable to acquire lock for configuring devices") try: - nrresult = nr_filtered.run(task=push_sync_device, dry_run=dry_run, job_id=job_id) + nrresult = nr_filtered.run( + task=push_sync_device, + dry_run=dry_run, + job_id=job_id, + confirm_mode_override=get_confirm_mode(confirm_mode_override), + ) except Exception as e: logger.exception("Exception while synchronizing devices: {}".format(str(e))) try: @@ -834,7 +862,7 @@ def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): dev.synchronized = False dev.last_seen = datetime.datetime.utcnow() # if next job will commit, that job will mark synchronized on success - elif api_settings.COMMIT_CONFIRMED_MODE != 2: + elif get_confirm_mode(confirm_mode_override) != 2: dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = True dev.last_seen = datetime.datetime.utcnow() @@ -842,7 +870,7 @@ def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = True dev.last_seen = datetime.datetime.utcnow() - if not dry_run and api_settings.COMMIT_CONFIRMED_MODE != 2: + if not dry_run and get_confirm_mode(confirm_mode_override) != 2: logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) Joblock.release_lock(session, job_id=job_id) @@ -877,7 +905,7 @@ def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): f"Auto-push of config to device {hostnames} failed because change score of " f"{total_change_score} is higher than auto-push limit {AUTOPUSH_MAX_SCORE}" ) - elif api_settings.COMMIT_CONFIRMED_MODE == 2 and not dry_run: + elif get_confirm_mode(confirm_mode_override) == 2 and not dry_run: if not changed_hosts: logger.info("None of the selected host has any changes (diff), skipping commit-confirm") logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) From 108ddebcf4f1cc5385a899e51138a1d9d8809dc7 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 14:43:46 +0100 Subject: [PATCH 027/169] add API parameter for confirm_mode --- src/cnaas_nms/api/device.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/cnaas_nms/api/device.py b/src/cnaas_nms/api/device.py index a451c9b1..42cc7880 100644 --- a/src/cnaas_nms/api/device.py +++ b/src/cnaas_nms/api/device.py @@ -116,6 +116,7 @@ "force": fields.Boolean(required=False), "auto_push": fields.Boolean(required=False), "resync": fields.Boolean(required=False), + "confirm_mode": fields.Integer(required=False), }, ) @@ -623,6 +624,16 @@ def post(self): kwargs["job_comment"] = json_data["comment"] if "ticket_ref" in json_data and isinstance(json_data["ticket_ref"], str): kwargs["job_ticket_ref"] = json_data["ticket_ref"] + if "confirm_mode" in json_data and isinstance(json_data["confirm_mode"], int): + if 0 >= json_data["confirm_mode"] >= 2: + kwargs["confirm_mode_override"] = json_data["confirm_mode"] + else: + return ( + empty_result( + status="error", data="If optional value confirm_mode is specified it must be an integer 0-2" + ), + 400, + ) total_count: Optional[int] = None nr = cnaas_init() From 3f3e67a6d0263a436e4e0363a66eb6e01c87739d Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 14:44:13 +0100 Subject: [PATCH 028/169] docs for confirm_mode in syncto API --- docs/apiref/syncto.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/apiref/syncto.rst b/docs/apiref/syncto.rst index d2a25675..f7e8b09e 100644 --- a/docs/apiref/syncto.rst +++ b/docs/apiref/syncto.rst @@ -36,6 +36,23 @@ The status success in this case only means that the job was scheduled successful you have to poll the job API to see that result of what was done, the job itself might still fail. +Configuration changes can be made in a way that requires a separate confirm call since version 1.5. +If the change can not be confirmed because the device is not unreachable for example, the device +will roll back the configuration. Before version 1.5 this concept was not supported, but from this +version it's supported and enabled by default using mode 1. + +Commit confirm modes: + - 0 = No confirm commit (default up to version 1.4) + - 1 = Commit is immediately confirmed for each device when that device is configured + (default from version 1.5) + - 2 = Commit is confirmed after all devices in the job has been configured, but only if all were + successful. This mode is only supported for EOS and JunOS so far, and only supported for small + number of devices per commit (max 50). If mode 2 is specified and an unsupported device is + selected that device will use mode 1 instead. + +Commit confirm mode can be specified in the configuration file, but it's also possible to override +that setting for a specific job using the API argument confirm_mode (see below). + Arguments: ---------- @@ -59,6 +76,8 @@ Arguments: This should be a string with max 255 characters. - ticket_ref: Optionally reference a service ticket associated with this job. This should be a string with max 32 characters. + - confirm_mode: Optionally override the default commit confirm mode (see above) for this job. + Must be an integer 0, 1 or 2 if specified. If neither hostname or device_type is specified all devices that needs to be sycnhronized will be selected. From a20567f39accefcad0ef8143355fa414a54bf139 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 14:50:34 +0100 Subject: [PATCH 029/169] docs for commit_confirm_mode and commit_confirmed_timeout settings --- docs/apiref/syncto.rst | 2 ++ docs/configuration/index.rst | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/docs/apiref/syncto.rst b/docs/apiref/syncto.rst index f7e8b09e..662266dc 100644 --- a/docs/apiref/syncto.rst +++ b/docs/apiref/syncto.rst @@ -41,6 +41,8 @@ If the change can not be confirmed because the device is not unreachable for exa will roll back the configuration. Before version 1.5 this concept was not supported, but from this version it's supported and enabled by default using mode 1. +.. _commit_confirm_modes: + Commit confirm modes: - 0 = No confirm commit (default up to version 1.4) - 1 = Commit is immediately confirmed for each device when that device is configured diff --git a/docs/configuration/index.rst b/docs/configuration/index.rst index e0531507..ac33be8b 100644 --- a/docs/configuration/index.rst +++ b/docs/configuration/index.rst @@ -38,6 +38,10 @@ Defines parameters for the API: each defined management domain when assigning new management IP addresses to devices. Defaults to 5 (e.g. meaning 10.0.0.1 through 10.0.0.5 would remain unassigned on a domain for 10.0.0.0/24). +- commit_confirmed_mode: Integer specifying default commit confirm mode + (see :ref:`commit_confirm_modes`). Defaults to 1. +- commit_confirmed_timeout: Time to wait before rolling back an unconfirmed commit, + specified in seconds. Defaults to 300. /etc/cnaas-nms/repository.yml ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 9c0c5779201de47ce71432d868192e3df57709b5 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 14:51:46 +0100 Subject: [PATCH 030/169] default api.yml settings for commit_confirmed --- docker/api/config/api.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/api/config/api.yml b/docker/api/config/api.yml index ddaf0f39..40754df9 100644 --- a/docker/api/config/api.yml +++ b/docker/api/config/api.yml @@ -9,3 +9,5 @@ certpath: /tmp/devicecerts/ global_unique_vlans: True init_mgmt_timeout: 30 mgmtdomain_reserved_count: 5 +commit_confirmed_mode: 1 +commit_confirmed_timeout: 300 From 7ba1715044055dfe4504ce88d576d08ef56f5266 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 15:16:00 +0100 Subject: [PATCH 031/169] fix argument name confirm_mode_override --- src/cnaas_nms/devicehandler/sync_devices.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 9f5e7c50..62ae47d0 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -368,7 +368,7 @@ def napalm_configure_confirmed( replace=None, commit_message: str = "", job_id: int = 0, - commit_confirm_override: Optional[int] = None, + confirm_mode_override: Optional[int] = None, ): """Configure device and set configure confirmed timeout to revert changes unless a confirm is received""" logger = get_logger() @@ -380,7 +380,7 @@ def napalm_configure_confirmed( if diff: n_device.commit_config(revert_in=api_settings.COMMIT_CONFIRMED_TIMEOUT) mode_2_supported = False - if get_confirm_mode(commit_confirm_override) == 2: + if get_confirm_mode(confirm_mode_override) == 2: if isinstance(n_device, (NapalmEOSDriver, NapalmJunOSDriver)): mode_2_supported = True else: @@ -389,7 +389,7 @@ def napalm_configure_confirmed( f"Falling back to mode 1 for device: {task.host.name}." ) - if get_confirm_mode(commit_confirm_override) == 1 or not mode_2_supported: + if get_confirm_mode(confirm_mode_override) == 1 or not mode_2_supported: if n_device.has_pending_commit(): n_device.confirm_commit() else: From 195c7d8e89f63e59205cb286a4a16cde01f3be43 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 15:20:21 +0100 Subject: [PATCH 032/169] log commit confirmed complete --- src/cnaas_nms/devicehandler/sync_devices.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 62ae47d0..5d1ae1ae 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -401,12 +401,14 @@ def napalm_configure_confirmed( def napalm_confirm_commit(task, prev_job_id: int = 0): """Confirm a previous pending configure session""" + logger = get_logger() n_device = task.host.get_connection("napalm", task.nornir.config) if isinstance(n_device, NapalmEOSDriver): n_device.config_session = "job{}".format(prev_job_id) n_device.confirm_commit() elif isinstance(n_device, NapalmJunOSDriver): n_device.confirm_commit() + logger.debug("Commit for job {} confirmed on device {}".format(prev_job_id, task.host.name)) def push_sync_device( From aece43aada4fdd5862f081a3efd0ea9f0c5c338c Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 30 Jan 2023 09:55:31 +0100 Subject: [PATCH 033/169] break out post sync update confighash to separate function, and make sure confirm_devices job also runs it --- src/cnaas_nms/devicehandler/sync_devices.py | 94 +++++++++++++-------- 1 file changed, 59 insertions(+), 35 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 5d1ae1ae..805c2f6b 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -361,6 +361,52 @@ def get_confirm_mode(confirm_mode_override: Optional[int] = None) -> int: return 1 +def post_sync_update_cofighash( + dry_run: bool, force: bool, nr_filtered: Nornir, unchanged_hosts: List, failed_hosts: List +): + """Update configuration hashes for device that were configured after sync has completed. + Args: + dry_run: bool + force: bool + nr_filtered: Nornir inventory of hosts to run on + unchanged_hosts: List of hosts that has not been changed, don't update confhosh + failed_hosts: List of hosts that failed with change, don't update confhash + """ + logger = get_logger() + nr_confighash = None + if dry_run and force: + # update config hash for devices that had an empty diff because local + # changes on a device can cause reordering of CLI commands that results + # in config hash mismatch even if the calculated diff was empty + def include_filter(host, include_list=unchanged_hosts): + if host.name in include_list: + return True + else: + return False + + nr_confighash = nr_filtered.filter(filter_func=include_filter) + elif not dry_run: + # set new config hash for devices that was successfully updated + def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): + if host.name in exclude_list: + return False + else: + return True + + nr_confighash = nr_filtered.filter(filter_func=exclude_filter) + + if nr_confighash: + try: + nrresult_confighash = nr_confighash.run(task=update_config_hash) + except Exception as e: + logger.exception("Exception while updating config hashes: {}".format(str(e))) + else: + if nrresult_confighash.failed: + logger.error( + "Unable to update some config hashes: {}".format(list(nrresult_confighash.failed_hosts.keys())) + ) + + def napalm_configure_confirmed( task, dry_run=None, @@ -691,13 +737,17 @@ def confirm_devices( if nrresult.failed: logger.error("Not all devices were successfully commit-confirmed") + post_sync_update_cofighash( + dry_run=False, force=False, nr_filtered=nr_filtered, unchanged_hosts=[], failed_hosts=failed_hosts + ) + with sqla_session() as session: for host, results in nrresult.items(): if host in failed_hosts or len(results) != 1: logger.debug("Setting device as unsync for failed commit-confirm on device {}".format(host)) dev: Device = session.query(Device).filter(Device.hostname == host).one() dev.synchronized = False - dev.last_seen = datetime.datetime.utcnow() + dev.confhash = None else: dev: Device = session.query(Device).filter(Device.hostname == host).one() dev.synchronized = True @@ -822,39 +872,13 @@ def sync_devices( change_scores.append(0) logger.debug("Empty diff for host {}, 0 change score".format(host)) - # break into separate function? - nr_confighash = None - if dry_run and force: - # update config hash for devices that had an empty diff because local - # changes on a device can cause reordering of CLI commands that results - # in config hash mismatch even if the calculated diff was empty - def include_filter(host, include_list=unchanged_hosts): - if host.name in include_list: - return True - else: - return False - - nr_confighash = nr_filtered.filter(filter_func=include_filter) - elif not dry_run: - # set new config hash for devices that was successfully updated - def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): - if host.name in exclude_list: - return False - else: - return True - - nr_confighash = nr_filtered.filter(filter_func=exclude_filter) - - if nr_confighash: - try: - nrresult_confighash = nr_confighash.run(task=update_config_hash) - except Exception as e: - logger.exception("Exception while updating config hashes: {}".format(str(e))) - else: - if nrresult_confighash.failed: - logger.error( - "Unable to update some config hashes: {}".format(list(nrresult_confighash.failed_hosts.keys())) - ) + post_sync_update_cofighash( + dry_run=dry_run, + force=force, + nr_filtered=nr_filtered, + unchanged_hosts=unchanged_hosts, + failed_hosts=failed_hosts, + ) # set devices as synchronized if needed with sqla_session() as session: @@ -918,7 +942,7 @@ def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): "cnaas_nms.devicehandler.sync_devices:confirm_devices", when=0, scheduled_by=scheduled_by, - kwargs={"prev_job_id": job_id, "hostnames": changed_hosts, "scheduled_by": scheduled_by}, + kwargs={"prev_job_id": job_id, "hostnames": changed_hosts}, ) logger.info(f"Commit-confirm for job id {job_id} scheduled as job id {next_job_id}") From dfa216c41697125c311caafcd57b960515133d14 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 10 Jan 2023 09:44:16 +0100 Subject: [PATCH 034/169] rebase with black formating --- src/cnaas_nms/devicehandler/sync_devices.py | 29 ++++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index bb87ebd9..0d39f879 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -5,7 +5,7 @@ from typing import List, Optional import yaml -from nornir.core.task import MultiResult +from nornir.core.task import MultiResult, Result from nornir_jinja2.plugins.tasks import template_file from nornir_napalm.plugins.tasks import napalm_configure, napalm_get from nornir_utils.plugins.functions import print_result @@ -343,6 +343,21 @@ def populate_device_vars( return device_variables +def napalm_configure_confirmed(task, dry_run=None, configuration=None, replace=None): + n_device = task.host.get_connection("napalm") + n_device.load_replace_candidate(config=configuration) + diff = n_device.compare_config() + if diff: + n_device.commit_config(revert_in=300) + if n_device.has_pending_commit(): + n_device.confirm_commit() + else: + n_device.discard_config() + else: + n_device.discard_config() + return Result(host=task.host, diff=diff, changed=len(diff) > 0) + + def push_sync_device( task, dry_run: bool = True, @@ -404,12 +419,12 @@ def push_sync_device( ) task.host.open_connection("napalm", configuration=task.nornir.config) + if dry_run: + run_task = napalm_configure + else: + run_task = napalm_configure_confirmed task.run( - task=napalm_configure, - name="Sync device config", - replace=True, - configuration=task.host["config"], - dry_run=dry_run, + task=run_task, name="Sync device config", replace=True, configuration=task.host["config"], dry_run=dry_run ) task.host.close_connection("napalm") @@ -543,7 +558,7 @@ def sync_devices( of sync. Args: - hostname: Specify a single host by hostname to synchronize + hostnames: Specify a single host by hostname to synchronize device_type: Specify a device type to synchronize group: Specify a group of devices to synchronize dry_run: Don't commit generated config to device From 56f919746424aa1ec3e9e11a2b75878572f795d5 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 10 Jan 2023 15:24:59 +0100 Subject: [PATCH 035/169] Pass different args to napalm_config_X depending on what task function, so we can get job_id into the confirmed version --- src/cnaas_nms/devicehandler/sync_devices.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 0d39f879..0f908ac1 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -11,7 +11,7 @@ from nornir_utils.plugins.functions import print_result import cnaas_nms.db.helper -from cnaas_nms.app_settings import app_settings +from cnaas_nms.app_settings import api_settings, app_settings from cnaas_nms.db.device import Device, DeviceState, DeviceType from cnaas_nms.db.device_vars import expand_interface_settings from cnaas_nms.db.git import RepoStructureException @@ -419,13 +419,21 @@ def push_sync_device( ) task.host.open_connection("napalm", configuration=task.nornir.config) + task_args = { + "name": "Sync device config", + "replace": True, + "configuration": task.host["config"], + "dry_run": dry_run, + "commit_message": "Job id {}".format(job_id), + } if dry_run: - run_task = napalm_configure + task_args["task"] = napalm_configure + elif api_settings.COMMIT_CONFIRMED_MODE == 0: + task_args["task"] = napalm_configure else: - run_task = napalm_configure_confirmed - task.run( - task=run_task, name="Sync device config", replace=True, configuration=task.host["config"], dry_run=dry_run - ) + task_args["task"] = napalm_configure_confirmed + task_args["job_id"] = job_id + task.run(**task_args) task.host.close_connection("napalm") if task.results[1].diff: From bf986fe5e533e08becc898005edf9a30219acb36 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 10 Jan 2023 15:25:59 +0100 Subject: [PATCH 036/169] Begin work on commit confirmed with 3 modes, 0=no confirm, 1=per device confirm, 2=next job id with confirm (platform dependent?) --- src/cnaas_nms/app_settings.py | 4 +++ src/cnaas_nms/devicehandler/sync_devices.py | 39 +++++++++++++++++---- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/src/cnaas_nms/app_settings.py b/src/cnaas_nms/app_settings.py index 2dc48e6a..5b7404f7 100644 --- a/src/cnaas_nms/app_settings.py +++ b/src/cnaas_nms/app_settings.py @@ -49,6 +49,8 @@ class ApiSettings(BaseSettings): GLOBAL_UNIQUE_VLANS: bool = True INIT_MGMT_TIMEOUT: int = 30 MGMTDOMAIN_RESERVED_COUNT: int = 5 + COMMIT_CONFIRMED_MODE: int = 1 + COMMIT_CONFIRMED_TIMEOUT: int = 300 def construct_api_settings() -> ApiSettings: @@ -76,6 +78,8 @@ def construct_api_settings() -> ApiSettings: GLOBAL_UNIQUE_VLANS=config.get("global_unique_vlans", True), INIT_MGMT_TIMEOUT=config.get("init_mgmt_timeout", 30), MGMTDOMAIN_RESERVED_COUNT=config.get("mgmtdomain_reserved_count", 5), + COMMIT_CONFIRMED_MODE=config.get("commit_confirmed_mode", 1), + COMMIT_CONFIRMED_TIMEOUT=config.get("commit_confirmed_timeout", 300), ) else: return ApiSettings() diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 0f908ac1..c4494add 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -5,6 +5,7 @@ from typing import List, Optional import yaml +from napalm.eos import EOSDriver as NapalmEOSDriver from nornir.core.task import MultiResult, Result from nornir_jinja2.plugins.tasks import template_file from nornir_napalm.plugins.tasks import napalm_configure, napalm_get @@ -343,21 +344,45 @@ def populate_device_vars( return device_variables -def napalm_configure_confirmed(task, dry_run=None, configuration=None, replace=None): - n_device = task.host.get_connection("napalm") +def napalm_configure_confirmed( + task, dry_run=None, configuration=None, replace=None, commit_message: str = "", job_id: int = 0 +): + """Configure device and set configure confirmed timeout to revert changes unless a confirm is received""" + logger = get_logger() + n_device = task.host.get_connection("napalm", task.nornir.config) n_device.load_replace_candidate(config=configuration) diff = n_device.compare_config() if diff: - n_device.commit_config(revert_in=300) - if n_device.has_pending_commit(): - n_device.confirm_commit() - else: - n_device.discard_config() + n_device.commit_config(revert_in=api_settings.COMMIT_CONFIRMED_TIMEOUT) + mode_2_supported = False + if api_settings.COMMIT_CONFIRMED_MODE == 2: + if isinstance(n_device, NapalmEOSDriver): + mode_2_supported = True + n_device.session_config = "job{}".format(job_id) + else: + logger.warn( + f"commit_confirmed_mode is set to 2, but it's unsupported for device OS '{task.host.platform}'. " + f"Falling back to mode 1 for device: {task.host.name}." + ) + + if api_settings.COMMIT_CONFIRMED_MODE == 1 or not mode_2_supported: + if n_device.has_pending_commit(): + n_device.confirm_commit() + else: + n_device.discard_config() else: n_device.discard_config() return Result(host=task.host, diff=diff, changed=len(diff) > 0) +def napalm_confirm_commit(task, job_id: int = 0): + """Confirm a previous pending configure session""" + n_device = task.host.get_connection("napalm") + if isinstance(n_device, NapalmEOSDriver): + n_device.session_config = "job{}".format(job_id) + n_device.confirm_commit() + + def push_sync_device( task, dry_run: bool = True, From a3f3ddded8cc6b7492a285881503cc721f71bb67 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 11 Jan 2023 15:49:35 +0100 Subject: [PATCH 037/169] break out parts of sync_device into separate function. add job function confirm_devices to run as second job after syncto, to confirm all devices. works with Arista if EOS-driver is changed to not discard changes at close() --- src/cnaas_nms/devicehandler/sync_devices.py | 144 ++++++++++++++++---- 1 file changed, 118 insertions(+), 26 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index c4494add..f78cff32 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -2,10 +2,11 @@ import os from hashlib import sha256 from ipaddress import IPv4Address, IPv4Interface -from typing import List, Optional +from typing import List, Optional, Tuple import yaml from napalm.eos import EOSDriver as NapalmEOSDriver +from nornir.core import Nornir from nornir.core.task import MultiResult, Result from nornir_jinja2.plugins.tasks import template_file from nornir_napalm.plugins.tasks import napalm_configure, napalm_get @@ -350,6 +351,8 @@ def napalm_configure_confirmed( """Configure device and set configure confirmed timeout to revert changes unless a confirm is received""" logger = get_logger() n_device = task.host.get_connection("napalm", task.nornir.config) + if isinstance(n_device, NapalmEOSDriver): + n_device.config_session = "job{}".format(job_id) n_device.load_replace_candidate(config=configuration) diff = n_device.compare_config() if diff: @@ -358,7 +361,6 @@ def napalm_configure_confirmed( if api_settings.COMMIT_CONFIRMED_MODE == 2: if isinstance(n_device, NapalmEOSDriver): mode_2_supported = True - n_device.session_config = "job{}".format(job_id) else: logger.warn( f"commit_confirmed_mode is set to 2, but it's unsupported for device OS '{task.host.platform}'. " @@ -375,11 +377,11 @@ def napalm_configure_confirmed( return Result(host=task.host, diff=diff, changed=len(diff) > 0) -def napalm_confirm_commit(task, job_id: int = 0): +def napalm_confirm_commit(task, prev_job_id: int = 0): """Confirm a previous pending configure session""" - n_device = task.host.get_connection("napalm") + n_device = task.host.get_connection("napalm", task.nornir.config) if isinstance(n_device, NapalmEOSDriver): - n_device.session_config = "job{}".format(job_id) + n_device.config_session = "job{}".format(prev_job_id) n_device.confirm_commit() @@ -574,6 +576,98 @@ def confcheck_devices(hostnames: List[str], job_id=None): raise Exception("Configuration hash check failed for {}".format(" ".join(nrresult.failed_hosts.keys()))) +def select_devices( + nr: Nornir, + hostnames: Optional[List[str]] = None, + device_type: Optional[str] = None, + group: Optional[str] = None, + resync: bool = False, + **kwargs, +) -> Tuple[Nornir, int, List[str]]: + """Get device selection for devices to synchronize. + + Returns: + Nornir: A filtered Nornir object based on the input arg nr + int: A count of number of devices selected + List[str]: A list of hostnames that will be skipped from the initial nr object + """ + logger = get_logger() + if hostnames: + nr_filtered, dev_count, skipped_hostnames = inventory_selector(nr, hostname=hostnames) + else: + if device_type: + nr_filtered, dev_count, skipped_hostnames = inventory_selector(nr, resync=resync, device_type=device_type) + elif group: + nr_filtered, dev_count, skipped_hostnames = inventory_selector(nr, resync=resync, group=group) + else: + # all devices + nr_filtered, dev_count, skipped_hostnames = inventory_selector(nr, resync=resync) + + if skipped_hostnames: + logger.info( + "Device(s) already synchronized, skipping ({}): {}".format( + len(skipped_hostnames), ", ".join(skipped_hostnames) + ) + ) + return nr_filtered, dev_count, skipped_hostnames + + +@job_wrapper +def confirm_devices( + prev_job_id: int, + hostnames: List[str], + job_id: Optional[int] = None, + scheduled_by: Optional[str] = None, + resync: bool = False, +) -> NornirJobResult: + logger = get_logger() + nr = cnaas_init() + + nr_filtered, dev_count, skipped_hostnames = select_devices(nr, hostnames, resync) + + device_list = list(nr_filtered.inventory.hosts.keys()) + logger.info("Device(s) selected for commit-confirm ({}): {}".format(dev_count, ", ".join(device_list))) + + try: + nrresult = nr_filtered.run(task=napalm_confirm_commit, prev_job_id=prev_job_id) + except Exception as e: + logger.exception("Exception while confirm-commit devices: {}".format(str(e))) + try: + with sqla_session() as session: + logger.info( + "Releasing lock for devices from syncto job: {} (in commit-job {})".format(prev_job_id, job_id) + ) + Joblock.release_lock(session, job_id=prev_job_id) + except Exception: + logger.error("Unable to release devices lock after syncto job") + return NornirJobResult(nrresult=nrresult) + + failed_hosts = list(nrresult.failed_hosts.keys()) + for hostname in failed_hosts: + logger.error("Commit-confirm failed for device '{}'".format(hostname)) + + # mark synced, remove mark sync and release job from sync_devices. break into functions? + if nrresult.failed: + logger.error("Not all devices were successfully commit-confirmed") + + with sqla_session() as session: + for host, results in nrresult.items(): + if host in failed_hosts or len(results) != 1: + logger.debug("Setting device as unsync for failed commit-confirm on device {}".format(host)) + dev: Device = session.query(Device).filter(Device.hostname == host).one() + dev.synchronized = False + dev.last_seen = datetime.datetime.utcnow() + else: + dev: Device = session.query(Device).filter(Device.hostname == host).one() + dev.synchronized = True + dev.last_seen = datetime.datetime.utcnow() + + logger.info("Releasing lock for devices from syncto job: {} (in commit-job {})".format(prev_job_id, job_id)) + Joblock.release_lock(session, job_id=prev_job_id) + + return NornirJobResult(nrresult=nrresult) + + @job_wrapper def sync_devices( hostnames: Optional[List[str]] = None, @@ -608,25 +702,7 @@ def sync_devices( """ logger = get_logger() nr = cnaas_init() - dev_count = 0 - skipped_hostnames = [] - if hostnames: - nr_filtered, dev_count, skipped_hostnames = inventory_selector(nr, hostname=hostnames) - else: - if device_type: - nr_filtered, dev_count, skipped_hostnames = inventory_selector(nr, resync=resync, device_type=device_type) - elif group: - nr_filtered, dev_count, skipped_hostnames = inventory_selector(nr, resync=resync, group=group) - else: - # all devices - nr_filtered, dev_count, skipped_hostnames = inventory_selector(nr, resync=resync) - - if skipped_hostnames: - logger.info( - "Device(s) already synchronized, skipping ({}): {}".format( - len(skipped_hostnames), ", ".join(skipped_hostnames) - ) - ) + nr_filtered, dev_count, skipped_hostnames = select_devices(nr, hostnames, device_type, group, resync) device_list = list(nr_filtered.inventory.hosts.keys()) logger.info("Device(s) selected for synchronization ({}): {}".format(dev_count, ", ".join(device_list))) @@ -689,6 +765,7 @@ def sync_devices( change_scores.append(0) logger.debug("Empty diff for host {}, 0 change score".format(host)) + # break into separate function? nr_confighash = None if dry_run and force: # update config hash for devices that had an empty diff because local @@ -729,7 +806,8 @@ def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = False dev.last_seen = datetime.datetime.utcnow() - else: + # if next job will commit, that job will mark synchronized on success + elif api_settings.COMMIT_CONFIRMED_MODE != 2: dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = True dev.last_seen = datetime.datetime.utcnow() @@ -737,7 +815,7 @@ def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = True dev.last_seen = datetime.datetime.utcnow() - if not dry_run: + if not dry_run and api_settings.COMMIT_CONFIRMED_MODE != 2: logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) Joblock.release_lock(session, job_id=job_id) @@ -772,6 +850,20 @@ def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): f"Auto-push of config to device {hostnames} failed because change score of " f"{total_change_score} is higher than auto-push limit {AUTOPUSH_MAX_SCORE}" ) + elif api_settings.COMMIT_CONFIRMED_MODE == 2: + if not changed_hosts: + logger.info("None of the selected host has any changes (diff), skipping commit-confirm") + logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) + Joblock.release_lock(session, job_id=job_id) + else: + scheduler = Scheduler() + next_job_id = scheduler.add_onetime_job( + "cnaas_nms.devicehandler.sync_devices:confirm_devices", + when=0, + scheduled_by=scheduled_by, + kwargs={"prev_job_id": job_id, "hostnames": changed_hosts}, + ) + logger.info(f"Commit-confirm for job id {job_id} scheduled as job id {next_job_id}") return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id, change_score=total_change_score) From 2d4a7d085c7ff48f0b6859d788ae6c3891761742 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 16 Jan 2023 11:31:01 +0100 Subject: [PATCH 038/169] let nornir handle connection open/close when doing commit_confirm_mode 2 instead of closing connection immediately. warn if syncing more than 50 devices, connection timeouts might cause issues? --- src/cnaas_nms/devicehandler/sync_devices.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index f78cff32..f3018106 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -445,7 +445,8 @@ def push_sync_device( "Synchronize device config for host: {} ({}:{})".format(task.host.name, task.host.hostname, task.host.port) ) - task.host.open_connection("napalm", configuration=task.nornir.config) + if api_settings.COMMIT_CONFIRMED_MODE != 2: + task.host.open_connection("napalm", configuration=task.nornir.config) task_args = { "name": "Sync device config", "replace": True, @@ -461,7 +462,8 @@ def push_sync_device( task_args["task"] = napalm_configure_confirmed task_args["job_id"] = job_id task.run(**task_args) - task.host.close_connection("napalm") + if api_settings.COMMIT_CONFIRMED_MODE != 2: + task.host.close_connection("napalm") if task.results[1].diff: config = task.results[1].host["config"] @@ -609,6 +611,10 @@ def select_devices( len(skipped_hostnames), ", ".join(skipped_hostnames) ) ) + + if dev_count > 50 and api_settings.COMMIT_CONFIRMED_MODE == 2: + logger.warning("commit_confirmed_mode 2 might not be reliable for syncs of more than 50 devices") + return nr_filtered, dev_count, skipped_hostnames From a73c7d4d75559a75feb5a7b057a9fc6e85c3279c Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 18 Jan 2023 10:32:02 +0100 Subject: [PATCH 039/169] allow overriding of settings for testing purpose. add tests for syncto commitconfirm mode 1 --- src/cnaas_nms/app_settings.py | 2 + src/cnaas_nms/devicehandler/sync_devices.py | 8 ++- .../devicehandler/tests/test_syncto.py | 54 +++++++++++++++++++ 3 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 src/cnaas_nms/devicehandler/tests/test_syncto.py diff --git a/src/cnaas_nms/app_settings.py b/src/cnaas_nms/app_settings.py index 5b7404f7..65904c9e 100644 --- a/src/cnaas_nms/app_settings.py +++ b/src/cnaas_nms/app_settings.py @@ -51,6 +51,7 @@ class ApiSettings(BaseSettings): MGMTDOMAIN_RESERVED_COUNT: int = 5 COMMIT_CONFIRMED_MODE: int = 1 COMMIT_CONFIRMED_TIMEOUT: int = 300 + SETTINGS_OVERRIDE: Optional[dict] = None def construct_api_settings() -> ApiSettings: @@ -80,6 +81,7 @@ def construct_api_settings() -> ApiSettings: MGMTDOMAIN_RESERVED_COUNT=config.get("mgmtdomain_reserved_count", 5), COMMIT_CONFIRMED_MODE=config.get("commit_confirmed_mode", 1), COMMIT_CONFIRMED_TIMEOUT=config.get("commit_confirmed_timeout", 300), + SETTINGS_OVERRIDE=config.get("settings_override", None), ) else: return ApiSettings() diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index f3018106..33e05c57 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -337,11 +337,15 @@ def populate_device_vars( # the list of configuration variables. The idea is to store secret # configuration outside of the templates repository. template_secrets = get_environment_secrets() + # For testing purposes allow overriding of settings instead of requiring git updates + override_dict = {} + if api_settings.SETTINGS_OVERRIDE and isinstance(api_settings.SETTINGS_OVERRIDE, dict): + override_dict = api_settings.SETTINGS_OVERRIDE # Merge all dicts with variables into one, later row overrides # Device variables override any names from settings, for example the # interfaces list from settings are replaced with an interface list from # device variables that contains more information - device_variables = {**settings, **device_variables, **template_secrets} + device_variables = {**settings, **device_variables, **template_secrets, **override_dict} return device_variables @@ -856,7 +860,7 @@ def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): f"Auto-push of config to device {hostnames} failed because change score of " f"{total_change_score} is higher than auto-push limit {AUTOPUSH_MAX_SCORE}" ) - elif api_settings.COMMIT_CONFIRMED_MODE == 2: + elif api_settings.COMMIT_CONFIRMED_MODE == 2 and not dry_run: if not changed_hosts: logger.info("None of the selected host has any changes (diff), skipping commit-confirm") logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py new file mode 100644 index 00000000..a4519e40 --- /dev/null +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -0,0 +1,54 @@ +import os +import time +from typing import Optional + +import pkg_resources +import pytest +import yaml + +from cnaas_nms.db.job import Job, JobStatus +from cnaas_nms.db.session import sqla_session +from cnaas_nms.db.settings import api_settings +from cnaas_nms.devicehandler.sync_devices import sync_devices +from cnaas_nms.scheduler.scheduler import Scheduler + + +@pytest.fixture +def testdata(scope="session"): + data_dir = pkg_resources.resource_filename(__name__, "data") + with open(os.path.join(data_dir, "testdata.yml"), "r") as f_testdata: + return yaml.safe_load(f_testdata) + + +@pytest.fixture +def scheduler(scope="session"): + scheduler = Scheduler() + scheduler.start() + return scheduler + + +@pytest.mark.integration +def test_syncto_commitmode_1(testdata, scheduler): + api_settings.COMMIT_CONFIRMED_MODE = 1 + api_settings.SETTINGS_OVERRIDE = {"cli_append_str": "interface Management1\ndescription test"} + job_id = scheduler.add_onetime_job( + sync_devices, + when=0, + scheduled_by="test_user", + kwargs={ + "hostnames": ["eosdist1"], + }, + ) + job_res: Optional[Job] = None + job_dict: Optional[dict] = None + with sqla_session() as session: + time.sleep(2) + for i in range(1, 5): + if not job_res or job_res.status == JobStatus.SCHEDULED or job_res.status == JobStatus.RUNNING: + job_res = session.query(Job).filter(Job.id == job_id).one() + job_dict = job_res.as_dict() + else: + break + breakpoint() + assert job_dict["status"] == "FINISHED" + assert job_dict["result"]["devices"]["eosdist1"]["failed"] is False From 0392ff293de59506f38d6ed5d913f9803b96df7c Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 18 Jan 2023 10:54:38 +0100 Subject: [PATCH 040/169] fix scheduler state check for integrationtests --- src/cnaas_nms/api/interface.py | 1 - src/cnaas_nms/devicehandler/tests/test_syncto.py | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cnaas_nms/api/interface.py b/src/cnaas_nms/api/interface.py index c7665a45..cbbb0a84 100644 --- a/src/cnaas_nms/api/interface.py +++ b/src/cnaas_nms/api/interface.py @@ -222,7 +222,6 @@ def put(self, hostname): elif "data" in if_dict and not if_dict["data"]: intfdata = None - if intfdata != intfdata_original: intf.data = intfdata updated = True diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index a4519e40..1fd2b025 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -5,6 +5,7 @@ import pkg_resources import pytest import yaml +from apscheduler.schedulers.base import STATE_STOPPED from cnaas_nms.db.job import Job, JobStatus from cnaas_nms.db.session import sqla_session @@ -23,7 +24,8 @@ def testdata(scope="session"): @pytest.fixture def scheduler(scope="session"): scheduler = Scheduler() - scheduler.start() + if scheduler.get_scheduler().state == STATE_STOPPED: + scheduler.start() return scheduler @@ -49,6 +51,5 @@ def test_syncto_commitmode_1(testdata, scheduler): job_dict = job_res.as_dict() else: break - breakpoint() assert job_dict["status"] == "FINISHED" assert job_dict["result"]["devices"]["eosdist1"]["failed"] is False From a07ebeb6d0f5c5bed1191ea5c2593e728f07a02a Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 18 Jan 2023 11:03:46 +0100 Subject: [PATCH 041/169] fix scheduler state check for integrationtests --- src/cnaas_nms/scheduler/tests/test_scheduler.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cnaas_nms/scheduler/tests/test_scheduler.py b/src/cnaas_nms/scheduler/tests/test_scheduler.py index 740e4291..96d35404 100644 --- a/src/cnaas_nms/scheduler/tests/test_scheduler.py +++ b/src/cnaas_nms/scheduler/tests/test_scheduler.py @@ -5,6 +5,7 @@ import pkg_resources import pytest import yaml +from apscheduler.schedulers.base import STATE_STOPPED from cnaas_nms.db.job import Job, JobStatus from cnaas_nms.db.session import sqla_session @@ -35,7 +36,8 @@ def requirements(self, postgresql): @classmethod def setUpClass(cls) -> None: scheduler = Scheduler() - scheduler.start() + if scheduler.get_scheduler().state == STATE_STOPPED: + scheduler.start() @classmethod def tearDownClass(cls) -> None: From 8442b02467e0f8374d37c7eedb64033f4afb74e8 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 18 Jan 2023 11:25:26 +0100 Subject: [PATCH 042/169] mark equipment instead of integrationtest for github actions --- src/cnaas_nms/devicehandler/tests/test_syncto.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index 1fd2b025..a7d1be08 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -29,7 +29,7 @@ def scheduler(scope="session"): return scheduler -@pytest.mark.integration +@pytest.mark.equipment def test_syncto_commitmode_1(testdata, scheduler): api_settings.COMMIT_CONFIRMED_MODE = 1 api_settings.SETTINGS_OVERRIDE = {"cli_append_str": "interface Management1\ndescription test"} From 7a435b4eea91b32bf0ec7fa4d9d89e2620e1ce98 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 18 Jan 2023 17:03:17 +0100 Subject: [PATCH 043/169] run syncto with mode 0, 1 and 2. check captured logging data for correct mode --- src/cnaas_nms/devicehandler/sync_devices.py | 5 +++ .../devicehandler/tests/data/testdata.yml | 4 ++ .../devicehandler/tests/test_syncto.py | 43 ++++++++++++++++--- 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 33e05c57..9d200de8 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -465,6 +465,11 @@ def push_sync_device( else: task_args["task"] = napalm_configure_confirmed task_args["job_id"] = job_id + logger.debug( + "Commit confirm mode for host {}: {} (dry_run: {})".format( + task.host.name, api_settings.COMMIT_CONFIRMED_MODE, dry_run + ) + ) task.run(**task_args) if api_settings.COMMIT_CONFIRMED_MODE != 2: task.host.close_connection("napalm") diff --git a/src/cnaas_nms/devicehandler/tests/data/testdata.yml b/src/cnaas_nms/devicehandler/tests/data/testdata.yml index 16a82fcf..0d4c6ee2 100644 --- a/src/cnaas_nms/devicehandler/tests/data/testdata.yml +++ b/src/cnaas_nms/devicehandler/tests/data/testdata.yml @@ -85,3 +85,7 @@ linknets_mlag_nonpeers: ipv4_network: null redundant_link: true site_id: null +syncto_device_hostnames: + - "eosdist1" +syncto_settings_override: + cli_append_str: "interface Management1\ndescription test" diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index a7d1be08..7bbdee69 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -1,3 +1,4 @@ +import logging import os import time from typing import Optional @@ -29,16 +30,13 @@ def scheduler(scope="session"): return scheduler -@pytest.mark.equipment -def test_syncto_commitmode_1(testdata, scheduler): - api_settings.COMMIT_CONFIRMED_MODE = 1 - api_settings.SETTINGS_OVERRIDE = {"cli_append_str": "interface Management1\ndescription test"} +def run_syncto_job(scheduler, testdata: dict) -> Optional[dict]: job_id = scheduler.add_onetime_job( sync_devices, when=0, scheduled_by="test_user", kwargs={ - "hostnames": ["eosdist1"], + "hostnames": testdata["syncto_device_hostnames"], }, ) job_res: Optional[Job] = None @@ -51,5 +49,40 @@ def test_syncto_commitmode_1(testdata, scheduler): job_dict = job_res.as_dict() else: break + return job_dict + + +@pytest.mark.equipment +def test_syncto_commitmode_0(testdata, scheduler, settings_directory, templates_directory, postgresql, redis, caplog): + api_settings.COMMIT_CONFIRMED_MODE = 0 + api_settings.SETTINGS_OVERRIDE = testdata["syncto_settings_override"] + with caplog.at_level(logging.DEBUG): + job_dict = run_syncto_job(scheduler, testdata) + hostname = testdata["syncto_device_hostnames"][0] + assert f"Commit confirm mode for host {hostname}: 0" in caplog.text + assert job_dict["status"] == "FINISHED" + assert job_dict["result"]["devices"]["eosdist1"]["failed"] is False + + +@pytest.mark.equipment +def test_syncto_commitmode_1(testdata, scheduler, settings_directory, templates_directory, postgresql, redis, caplog): + api_settings.COMMIT_CONFIRMED_MODE = 1 + api_settings.SETTINGS_OVERRIDE = testdata["syncto_settings_override"] + with caplog.at_level(logging.DEBUG): + job_dict = run_syncto_job(scheduler, testdata) + hostname = testdata["syncto_device_hostnames"][0] + assert f"Commit confirm mode for host {hostname}: 1" in caplog.text + assert job_dict["status"] == "FINISHED" + assert job_dict["result"]["devices"]["eosdist1"]["failed"] is False + + +@pytest.mark.equipment +def test_syncto_commitmode_2(testdata, scheduler, settings_directory, templates_directory, postgresql, redis, caplog): + api_settings.COMMIT_CONFIRMED_MODE = 2 + api_settings.SETTINGS_OVERRIDE = testdata["syncto_settings_override"] + with caplog.at_level(logging.DEBUG): + job_dict = run_syncto_job(scheduler, testdata) + hostname = testdata["syncto_device_hostnames"][0] + assert f"Commit confirm mode for host {hostname}: 2" in caplog.text assert job_dict["status"] == "FINISHED" assert job_dict["result"]["devices"]["eosdist1"]["failed"] is False From 30ff29c5679228abe5590bb6faaa92fbe22a66ca Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Thu, 19 Jan 2023 16:37:50 +0100 Subject: [PATCH 044/169] try to get code coverage for the confirm_devices function --- .../devicehandler/tests/test_syncto.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index 7bbdee69..b2df9c99 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -30,13 +30,14 @@ def scheduler(scope="session"): return scheduler -def run_syncto_job(scheduler, testdata: dict) -> Optional[dict]: +def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[dict]: job_id = scheduler.add_onetime_job( sync_devices, when=0, scheduled_by="test_user", kwargs={ "hostnames": testdata["syncto_device_hostnames"], + "dry_run": dry_run, }, ) job_res: Optional[Job] = None @@ -61,7 +62,7 @@ def test_syncto_commitmode_0(testdata, scheduler, settings_directory, templates_ hostname = testdata["syncto_device_hostnames"][0] assert f"Commit confirm mode for host {hostname}: 0" in caplog.text assert job_dict["status"] == "FINISHED" - assert job_dict["result"]["devices"]["eosdist1"]["failed"] is False + assert job_dict["result"]["devices"][hostname]["failed"] is False @pytest.mark.equipment @@ -73,7 +74,7 @@ def test_syncto_commitmode_1(testdata, scheduler, settings_directory, templates_ hostname = testdata["syncto_device_hostnames"][0] assert f"Commit confirm mode for host {hostname}: 1" in caplog.text assert job_dict["status"] == "FINISHED" - assert job_dict["result"]["devices"]["eosdist1"]["failed"] is False + assert job_dict["result"]["devices"][hostname]["failed"] is False @pytest.mark.equipment @@ -81,8 +82,16 @@ def test_syncto_commitmode_2(testdata, scheduler, settings_directory, templates_ api_settings.COMMIT_CONFIRMED_MODE = 2 api_settings.SETTINGS_OVERRIDE = testdata["syncto_settings_override"] with caplog.at_level(logging.DEBUG): - job_dict = run_syncto_job(scheduler, testdata) + job_dict = run_syncto_job(scheduler, testdata, dry_run=False) hostname = testdata["syncto_device_hostnames"][0] assert f"Commit confirm mode for host {hostname}: 2" in caplog.text assert job_dict["status"] == "FINISHED" - assert job_dict["result"]["devices"]["eosdist1"]["failed"] is False + assert job_dict["result"]["devices"][hostname]["failed"] is False + + # Revert change + api_settings.SETTINGS_OVERRIDE = None + with caplog.at_level(logging.DEBUG): + job_dict = run_syncto_job(scheduler, testdata, dry_run=False) + assert "selected for commit-confirm" in caplog.text + assert job_dict["status"] == "FINISHED" + assert job_dict["result"]["devices"][hostname]["failed"] is False From b0e2c52684d32a6e14762e9a6d5dd6833059db0b Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 23 Jan 2023 09:24:14 +0100 Subject: [PATCH 045/169] longer timeout since sync doesn't finish on kvm vm --- src/cnaas_nms/devicehandler/tests/test_syncto.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index b2df9c99..f4a2e159 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -44,7 +44,7 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ job_dict: Optional[dict] = None with sqla_session() as session: time.sleep(2) - for i in range(1, 5): + for i in range(1, 15): if not job_res or job_res.status == JobStatus.SCHEDULED or job_res.status == JobStatus.RUNNING: job_res = session.query(Job).filter(Job.id == job_id).one() job_dict = job_res.as_dict() From 23763f204417a9f65fdfc6e81826eda85c1c87a3 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 23 Jan 2023 14:43:26 +0100 Subject: [PATCH 046/169] resync=True to get device to new config with settings_override even if already synced --- src/cnaas_nms/devicehandler/tests/test_syncto.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index f4a2e159..dc1c7ec3 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -38,6 +38,7 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ kwargs={ "hostnames": testdata["syncto_device_hostnames"], "dry_run": dry_run, + "resync": True, }, ) job_res: Optional[Job] = None From 8553f4d53b8f23a1aae6d975cba0cbefefd80e86 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 23 Jan 2023 17:39:22 +0100 Subject: [PATCH 047/169] reattempt acquire joblock, sometimes unittest can fail otherwise --- src/cnaas_nms/devicehandler/sync_devices.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 9d200de8..cd246282 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -1,5 +1,6 @@ import datetime import os +import time from hashlib import sha256 from ipaddress import IPv4Address, IPv4Interface from typing import List, Optional, Tuple @@ -739,7 +740,15 @@ def sync_devices( if not dry_run: with sqla_session() as session: logger.info("Trying to acquire lock for devices to run syncto job: {}".format(job_id)) - if not Joblock.acquire_lock(session, name="devices", job_id=job_id): + max_attempts = 5 + lock_ok: bool = False + for i in range(max_attempts): + lock_ok = Joblock.acquire_lock(session, name="devices", job_id=job_id) + if lock_ok: + break + else: + time.sleep(2) + if not lock_ok: raise JoblockError("Unable to acquire lock for configuring devices") try: From 92c2d47338183a94f933611fb448b6dba7cc4d8a Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 23 Jan 2023 17:40:37 +0100 Subject: [PATCH 048/169] syncto tests: sleep in between getting jobstatus. wait for next_job to finish for commit confirm mode 2. --- .../devicehandler/tests/test_syncto.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index dc1c7ec3..b33925f4 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -43,13 +43,24 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ ) job_res: Optional[Job] = None job_dict: Optional[dict] = None + jobstatus_wait = [JobStatus.SCHEDULED, JobStatus.RUNNING] with sqla_session() as session: - time.sleep(2) - for i in range(1, 15): - if not job_res or job_res.status == JobStatus.SCHEDULED or job_res.status == JobStatus.RUNNING: - job_res = session.query(Job).filter(Job.id == job_id).one() + for i in range(1, 30): + time.sleep(1) + if not job_res or job_res.status in jobstatus_wait: + job_res: Job = session.query(Job).filter(Job.id == job_id).one() job_dict = job_res.as_dict() + # if next_job_id scheduled for confirm action, wait for that also + if job_res.next_job_id: + next_job_res = Optional[Job] = None + for j in range(1, 30): + time.sleep(1) + if not next_job_res or next_job_res.status in jobstatus_wait: + next_job_res = session.query(Job).filter(Job.id == job_res.next_job_id).one() + else: + break else: + print("test run_syncto_job run {} status {}".format(i, JobStatus.name)) break return job_dict From 22c2929b8006bfbe51dcb4a5456a2fd0a5ead5c6 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 24 Jan 2023 16:00:14 +0100 Subject: [PATCH 049/169] more verbose logging for pytest. test super-long timeout for config test --- docker/api/pytest.sh | 2 +- src/cnaas_nms/devicehandler/tests/test_syncto.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/api/pytest.sh b/docker/api/pytest.sh index fef7da3f..19c034fa 100755 --- a/docker/api/pytest.sh +++ b/docker/api/pytest.sh @@ -1,6 +1,6 @@ #!/bin/bash -PYTESTARGS=() +PYTESTARGS=("-vv" "--showlocals") if [ ! -z "$NO_EQUIPMENTTEST" ] ; then PYTESTARGS+=("-m" "not equipment") diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index b33925f4..bcda4d6d 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -45,7 +45,7 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ job_dict: Optional[dict] = None jobstatus_wait = [JobStatus.SCHEDULED, JobStatus.RUNNING] with sqla_session() as session: - for i in range(1, 30): + for i in range(1, 300): time.sleep(1) if not job_res or job_res.status in jobstatus_wait: job_res: Job = session.query(Job).filter(Job.id == job_id).one() @@ -53,7 +53,7 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ # if next_job_id scheduled for confirm action, wait for that also if job_res.next_job_id: next_job_res = Optional[Job] = None - for j in range(1, 30): + for j in range(1, 300): time.sleep(1) if not next_job_res or next_job_res.status in jobstatus_wait: next_job_res = session.query(Job).filter(Job.id == job_res.next_job_id).one() From e787551960787e9f9a598ab8848995f9e02e597b Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 25 Jan 2023 09:35:26 +0100 Subject: [PATCH 050/169] cleanup after scheduler fixture. print jobstatus from test if not == finished --- src/cnaas_nms/devicehandler/tests/test_syncto.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index bcda4d6d..eb103195 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -13,6 +13,7 @@ from cnaas_nms.db.settings import api_settings from cnaas_nms.devicehandler.sync_devices import sync_devices from cnaas_nms.scheduler.scheduler import Scheduler +from cnaas_nms.tools.log import get_logger @pytest.fixture @@ -27,10 +28,14 @@ def scheduler(scope="session"): scheduler = Scheduler() if scheduler.get_scheduler().state == STATE_STOPPED: scheduler.start() - return scheduler + yield scheduler + time.sleep(3) + scheduler.get_scheduler().print_jobs() + scheduler.shutdown() def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[dict]: + logger = get_logger() job_id = scheduler.add_onetime_job( sync_devices, when=0, @@ -45,7 +50,7 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ job_dict: Optional[dict] = None jobstatus_wait = [JobStatus.SCHEDULED, JobStatus.RUNNING] with sqla_session() as session: - for i in range(1, 300): + for i in range(1, 30): time.sleep(1) if not job_res or job_res.status in jobstatus_wait: job_res: Job = session.query(Job).filter(Job.id == job_id).one() @@ -53,15 +58,16 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ # if next_job_id scheduled for confirm action, wait for that also if job_res.next_job_id: next_job_res = Optional[Job] = None - for j in range(1, 300): + for j in range(1, 30): time.sleep(1) if not next_job_res or next_job_res.status in jobstatus_wait: next_job_res = session.query(Job).filter(Job.id == job_res.next_job_id).one() else: break else: - print("test run_syncto_job run {} status {}".format(i, JobStatus.name)) break + if job_dict["status"] != "FINISHED": + logger.debug("test run_syncto_job job status '{}': {}".format(job_dict["status"], job_dict)) return job_dict From 63babf289bd22a220da065b95d0dca722552dd18 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 25 Jan 2023 14:24:44 +0100 Subject: [PATCH 051/169] make sure to refresh session objects between polls --- src/cnaas_nms/devicehandler/tests/test_syncto.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index eb103195..d0923573 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -6,7 +6,6 @@ import pkg_resources import pytest import yaml -from apscheduler.schedulers.base import STATE_STOPPED from cnaas_nms.db.job import Job, JobStatus from cnaas_nms.db.session import sqla_session @@ -24,10 +23,9 @@ def testdata(scope="session"): @pytest.fixture -def scheduler(scope="session"): +def scheduler(scope="module"): scheduler = Scheduler() - if scheduler.get_scheduler().state == STATE_STOPPED: - scheduler.start() + scheduler.start() yield scheduler time.sleep(3) scheduler.get_scheduler().print_jobs() @@ -54,6 +52,7 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ time.sleep(1) if not job_res or job_res.status in jobstatus_wait: job_res: Job = session.query(Job).filter(Job.id == job_id).one() + session.refresh(job_res) job_dict = job_res.as_dict() # if next_job_id scheduled for confirm action, wait for that also if job_res.next_job_id: @@ -62,6 +61,7 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ time.sleep(1) if not next_job_res or next_job_res.status in jobstatus_wait: next_job_res = session.query(Job).filter(Job.id == job_res.next_job_id).one() + session.refresh(next_job_res) else: break else: From bb5d212547a581d4752f314fe01301472deacf68 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 25 Jan 2023 16:35:59 +0100 Subject: [PATCH 052/169] add scheduler fixture to global conftest for pytest, and make both syncto and test scheduler use that one with scope=session to only have one scheduler. rework test_scheduler to pytest format. --- .../devicehandler/tests/test_syncto.py | 13 +-- .../scheduler/tests/test_scheduler.py | 104 ++++++------------ src/conftest.py | 12 ++ 3 files changed, 48 insertions(+), 81 deletions(-) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index d0923573..0d7b9b46 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -11,27 +11,16 @@ from cnaas_nms.db.session import sqla_session from cnaas_nms.db.settings import api_settings from cnaas_nms.devicehandler.sync_devices import sync_devices -from cnaas_nms.scheduler.scheduler import Scheduler from cnaas_nms.tools.log import get_logger @pytest.fixture -def testdata(scope="session"): +def testdata(scope="module"): data_dir = pkg_resources.resource_filename(__name__, "data") with open(os.path.join(data_dir, "testdata.yml"), "r") as f_testdata: return yaml.safe_load(f_testdata) -@pytest.fixture -def scheduler(scope="module"): - scheduler = Scheduler() - scheduler.start() - yield scheduler - time.sleep(3) - scheduler.get_scheduler().print_jobs() - scheduler.shutdown() - - def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[dict]: logger = get_logger() job_id = scheduler.add_onetime_job( diff --git a/src/cnaas_nms/scheduler/tests/test_scheduler.py b/src/cnaas_nms/scheduler/tests/test_scheduler.py index 96d35404..0b9b78bd 100644 --- a/src/cnaas_nms/scheduler/tests/test_scheduler.py +++ b/src/cnaas_nms/scheduler/tests/test_scheduler.py @@ -1,16 +1,10 @@ -import os import time -import unittest -import pkg_resources import pytest -import yaml -from apscheduler.schedulers.base import STATE_STOPPED from cnaas_nms.db.job import Job, JobStatus from cnaas_nms.db.session import sqla_session from cnaas_nms.scheduler.jobresult import DictJobResult -from cnaas_nms.scheduler.scheduler import Scheduler from cnaas_nms.scheduler.wrapper import job_wrapper @@ -27,68 +21,40 @@ def job_testfunc_exception(text="", job_id=None, scheduled_by=None): @pytest.mark.integration -class InitTests(unittest.TestCase): - @pytest.fixture(autouse=True) - def requirements(self, postgresql): - """Ensures the required pytest fixtures are loaded implicitly for all these tests""" - pass +def test_add_schedule(postgresql, scheduler): + job1_id = scheduler.add_onetime_job( + job_testfunc_success, when=1, scheduled_by="test_user", kwargs={"text": "success"} + ) + job2_id = scheduler.add_onetime_job( + job_testfunc_exception, when=1, scheduled_by="test_user", kwargs={"text": "exception"} + ) + assert isinstance(job1_id, int) + assert isinstance(job2_id, int) + print(f"Test job 1 scheduled as ID { job1_id }") + print(f"Test job 2 scheduled as ID { job2_id }") + time.sleep(3) + with sqla_session() as session: + job1 = session.query(Job).filter(Job.id == job1_id).one_or_none() + assert isinstance(job1, Job), "Test job 1 could not be found" + assert job1.status == JobStatus.FINISHED, "Test job 1 did not finish" + assert job1.result == {"status": "success"}, "Test job 1 returned bad status" + job2 = session.query(Job).filter(Job.id == job2_id).one_or_none() + assert isinstance(job2, Job), "Test job 2 could not be found" + assert job2.status == JobStatus.EXCEPTION, "Test job 2 did not make exception" + assert "message" in job2.exception, "Test job 2 did not contain message in exception" - @classmethod - def setUpClass(cls) -> None: - scheduler = Scheduler() - if scheduler.get_scheduler().state == STATE_STOPPED: - scheduler.start() - @classmethod - def tearDownClass(cls) -> None: - scheduler = Scheduler() - time.sleep(3) - scheduler.get_scheduler().print_jobs() - scheduler.shutdown() - - def setUp(self): - data_dir = pkg_resources.resource_filename(__name__, "data") - with open(os.path.join(data_dir, "testdata.yml"), "r") as f_testdata: - self.testdata = yaml.safe_load(f_testdata) - - def test_add_schedule(self): - scheduler = Scheduler() - job1_id = scheduler.add_onetime_job( - job_testfunc_success, when=1, scheduled_by="test_user", kwargs={"text": "success"} - ) - job2_id = scheduler.add_onetime_job( - job_testfunc_exception, when=1, scheduled_by="test_user", kwargs={"text": "exception"} - ) - assert isinstance(job1_id, int) - assert isinstance(job2_id, int) - print(f"Test job 1 scheduled as ID { job1_id }") - print(f"Test job 2 scheduled as ID { job2_id }") - time.sleep(3) - with sqla_session() as session: - job1 = session.query(Job).filter(Job.id == job1_id).one_or_none() - self.assertIsInstance(job1, Job, "Test job 1 could not be found") - self.assertEqual(job1.status, JobStatus.FINISHED, "Test job 1 did not finish") - self.assertEqual(job1.result, {"status": "success"}, "Test job 1 returned bad status") - job2 = session.query(Job).filter(Job.id == job2_id).one_or_none() - self.assertIsInstance(job2, Job, "Test job 2 could not be found") - self.assertEqual(job2.status, JobStatus.EXCEPTION, "Test job 2 did not make exception") - self.assertIn("message", job2.exception, "Test job 2 did not contain message in exception") - - def test_abort_schedule(self): - scheduler = Scheduler() - job3_id = scheduler.add_onetime_job( - job_testfunc_success, when=600, scheduled_by="test_user", kwargs={"text": "abort"} - ) - assert isinstance(job3_id, int) - print(f"Test job 3 scheduled as ID { job3_id }") - scheduler.remove_scheduled_job(job3_id) - time.sleep(3) - with sqla_session() as session: - job3 = session.query(Job).filter(Job.id == job3_id).one_or_none() - self.assertIsInstance(job3, Job, "Test job 3 could not be found") - self.assertEqual(job3.status, JobStatus.ABORTED, "Test job 3 did not abort") - self.assertEqual(job3.result, {"message": "removed"}, "Test job 3 returned bad status") - - -if __name__ == "__main__": - unittest.main() +@pytest.mark.integration +def test_abort_schedule(postgresql, scheduler): + job3_id = scheduler.add_onetime_job( + job_testfunc_success, when=600, scheduled_by="test_user", kwargs={"text": "abort"} + ) + assert isinstance(job3_id, int) + print(f"Test job 3 scheduled as ID { job3_id }") + scheduler.remove_scheduled_job(job3_id) + time.sleep(3) + with sqla_session() as session: + job3 = session.query(Job).filter(Job.id == job3_id).one_or_none() + assert isinstance(job3, Job), "Test job 3 could not be found" + assert job3.status == JobStatus.ABORTED, "Test job 3 did not abort" + assert job3.result == {"message": "removed"}, "Test job 3 returned bad status" diff --git a/src/conftest.py b/src/conftest.py index abd55c5d..a7fb9f33 100644 --- a/src/conftest.py +++ b/src/conftest.py @@ -5,6 +5,8 @@ from contextlib import closing import pytest + +from cnaas_nms.scheduler.scheduler import Scheduler from git import Repo @@ -92,3 +94,13 @@ def wait_for_port(host: str, port: int, tries=10) -> bool: time.sleep(0.5) print(f"NO RESPONSE from {host}:{port}") return False + + +@pytest.fixture +def scheduler(scope="session"): + scheduler = Scheduler() + scheduler.start() + yield scheduler + time.sleep(3) + scheduler.get_scheduler().print_jobs() + scheduler.shutdown() From 824d0ddd944157ceed383982c26baa2cf6fce36a Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Thu, 26 Jan 2023 17:17:54 +0100 Subject: [PATCH 053/169] scope should be on decorator, not on function --- src/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/conftest.py b/src/conftest.py index a7fb9f33..70ec6497 100644 --- a/src/conftest.py +++ b/src/conftest.py @@ -96,8 +96,8 @@ def wait_for_port(host: str, port: int, tries=10) -> bool: return False -@pytest.fixture -def scheduler(scope="session"): +@pytest.fixture(scope="session") +def scheduler(): scheduler = Scheduler() scheduler.start() yield scheduler From b448ebe3597d8aeb6d34d05fc9d33eac68f4587b Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 09:25:53 +0100 Subject: [PATCH 054/169] fix next_job_res typing, log bad status for confirm job as well --- src/cnaas_nms/devicehandler/tests/test_syncto.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/cnaas_nms/devicehandler/tests/test_syncto.py b/src/cnaas_nms/devicehandler/tests/test_syncto.py index 0d7b9b46..e627a9d2 100644 --- a/src/cnaas_nms/devicehandler/tests/test_syncto.py +++ b/src/cnaas_nms/devicehandler/tests/test_syncto.py @@ -45,18 +45,22 @@ def run_syncto_job(scheduler, testdata: dict, dry_run: bool = True) -> Optional[ job_dict = job_res.as_dict() # if next_job_id scheduled for confirm action, wait for that also if job_res.next_job_id: - next_job_res = Optional[Job] = None + confirm_job_res: Optional[Job] = None + confirm_job_dict: Optional[dict] = None for j in range(1, 30): time.sleep(1) - if not next_job_res or next_job_res.status in jobstatus_wait: - next_job_res = session.query(Job).filter(Job.id == job_res.next_job_id).one() - session.refresh(next_job_res) + if not confirm_job_res or confirm_job_res.status in jobstatus_wait: + confirm_job_res = session.query(Job).filter(Job.id == job_res.next_job_id).one() + session.refresh(confirm_job_res) + confirm_job_dict = confirm_job_res.as_dict() else: break + if confirm_job_dict and confirm_job_dict["status"] != "FINISHED": + logger.warning("test run_syncto_job confirm job bad status: {}".format(confirm_job_dict)) else: break if job_dict["status"] != "FINISHED": - logger.debug("test run_syncto_job job status '{}': {}".format(job_dict["status"], job_dict)) + logger.warning("test run_syncto_job job bad status: {}".format(job_dict)) return job_dict From 789d99b8ddb6a60de7e93d432540a4ff6dcc58d5 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 13:40:28 +0100 Subject: [PATCH 055/169] support commit mode 2 for junos --- src/cnaas_nms/devicehandler/sync_devices.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index cd246282..e4cf10c7 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -7,6 +7,7 @@ import yaml from napalm.eos import EOSDriver as NapalmEOSDriver +from napalm.junos import JunOSDriver as NapalmJunOSDriver from nornir.core import Nornir from nornir.core.task import MultiResult, Result from nornir_jinja2.plugins.tasks import template_file @@ -364,7 +365,7 @@ def napalm_configure_confirmed( n_device.commit_config(revert_in=api_settings.COMMIT_CONFIRMED_TIMEOUT) mode_2_supported = False if api_settings.COMMIT_CONFIRMED_MODE == 2: - if isinstance(n_device, NapalmEOSDriver): + if isinstance(n_device, (NapalmEOSDriver, NapalmJunOSDriver)): mode_2_supported = True else: logger.warn( @@ -388,6 +389,8 @@ def napalm_confirm_commit(task, prev_job_id: int = 0): if isinstance(n_device, NapalmEOSDriver): n_device.config_session = "job{}".format(prev_job_id) n_device.confirm_commit() + elif isinstance(n_device, NapalmJunOSDriver): + n_device.confirm_commit() def push_sync_device( From 68505012307d85f6ca1ff861d80da482a7cb0661 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 13:45:20 +0100 Subject: [PATCH 056/169] make sure confirm job gets scheduled_by string --- src/cnaas_nms/devicehandler/sync_devices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index e4cf10c7..0b894dae 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -888,7 +888,7 @@ def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): "cnaas_nms.devicehandler.sync_devices:confirm_devices", when=0, scheduled_by=scheduled_by, - kwargs={"prev_job_id": job_id, "hostnames": changed_hosts}, + kwargs={"prev_job_id": job_id, "hostnames": changed_hosts, "scheduled_by": scheduled_by}, ) logger.info(f"Commit-confirm for job id {job_id} scheduled as job id {next_job_id}") From d52ac38996cf6388da6833bc5575ac6d3452b6bc Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 14:05:29 +0100 Subject: [PATCH 057/169] allow calling syncto with confirm_mode_override to override setting specified in yaml --- src/cnaas_nms/devicehandler/sync_devices.py | 44 +++++++++++++++++---- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 0b894dae..9f5e7c50 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -351,8 +351,24 @@ def populate_device_vars( return device_variables +def get_confirm_mode(confirm_mode_override: Optional[int] = None) -> int: + valid_modes = [0, 1, 2] + if confirm_mode_override and confirm_mode_override in valid_modes: + return confirm_mode_override + elif api_settings.COMMIT_CONFIRMED_MODE and api_settings.COMMIT_CONFIRMED_MODE in valid_modes: + return api_settings.COMMIT_CONFIRMED_MODE + else: + return 1 + + def napalm_configure_confirmed( - task, dry_run=None, configuration=None, replace=None, commit_message: str = "", job_id: int = 0 + task, + dry_run=None, + configuration=None, + replace=None, + commit_message: str = "", + job_id: int = 0, + commit_confirm_override: Optional[int] = None, ): """Configure device and set configure confirmed timeout to revert changes unless a confirm is received""" logger = get_logger() @@ -364,7 +380,7 @@ def napalm_configure_confirmed( if diff: n_device.commit_config(revert_in=api_settings.COMMIT_CONFIRMED_TIMEOUT) mode_2_supported = False - if api_settings.COMMIT_CONFIRMED_MODE == 2: + if get_confirm_mode(commit_confirm_override) == 2: if isinstance(n_device, (NapalmEOSDriver, NapalmJunOSDriver)): mode_2_supported = True else: @@ -373,7 +389,7 @@ def napalm_configure_confirmed( f"Falling back to mode 1 for device: {task.host.name}." ) - if api_settings.COMMIT_CONFIRMED_MODE == 1 or not mode_2_supported: + if get_confirm_mode(commit_confirm_override) == 1 or not mode_2_supported: if n_device.has_pending_commit(): n_device.confirm_commit() else: @@ -399,6 +415,7 @@ def push_sync_device( generate_only: bool = False, job_id: Optional[str] = None, scheduled_by: Optional[str] = None, + confirm_mode_override: Optional[int] = None, ): """ Nornir task to generate config and push to device @@ -408,7 +425,9 @@ def push_sync_device( dry_run: Don't commit config to device, just do compare/diff generate_only: Only generate text config, don't try to commit or even do dry_run compare to running config - + job_id: Job ID integer + scheduled_by: username of users that scheduled job + confirm_mode_override: integer to specify commit confirm mode Returns: """ @@ -469,6 +488,7 @@ def push_sync_device( else: task_args["task"] = napalm_configure_confirmed task_args["job_id"] = job_id + task_args["confirm_mode_override"] = confirm_mode_override logger.debug( "Commit confirm mode for host {}: {} (dry_run: {})".format( task.host.name, api_settings.COMMIT_CONFIRMED_MODE, dry_run @@ -698,6 +718,7 @@ def sync_devices( job_id: Optional[int] = None, scheduled_by: Optional[str] = None, resync: bool = False, + confirm_mode_override: Optional[int] = None, ) -> NornirJobResult: """Synchronize devices to their respective templates. If no arguments are specified then synchronize all devices that are currently out @@ -715,6 +736,8 @@ def sync_devices( scheduled_by: Username from JWT resync: Re-synchronize a device even if it's marked as synced in the database, a device selected by hostname is always re-synced + confirm_mode_override: Override settings commit confirm mode, optional int + with value 0, 1 or 2 Returns: NornirJobResult @@ -755,7 +778,12 @@ def sync_devices( raise JoblockError("Unable to acquire lock for configuring devices") try: - nrresult = nr_filtered.run(task=push_sync_device, dry_run=dry_run, job_id=job_id) + nrresult = nr_filtered.run( + task=push_sync_device, + dry_run=dry_run, + job_id=job_id, + confirm_mode_override=get_confirm_mode(confirm_mode_override), + ) except Exception as e: logger.exception("Exception while synchronizing devices: {}".format(str(e))) try: @@ -834,7 +862,7 @@ def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): dev.synchronized = False dev.last_seen = datetime.datetime.utcnow() # if next job will commit, that job will mark synchronized on success - elif api_settings.COMMIT_CONFIRMED_MODE != 2: + elif get_confirm_mode(confirm_mode_override) != 2: dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = True dev.last_seen = datetime.datetime.utcnow() @@ -842,7 +870,7 @@ def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = True dev.last_seen = datetime.datetime.utcnow() - if not dry_run and api_settings.COMMIT_CONFIRMED_MODE != 2: + if not dry_run and get_confirm_mode(confirm_mode_override) != 2: logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) Joblock.release_lock(session, job_id=job_id) @@ -877,7 +905,7 @@ def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): f"Auto-push of config to device {hostnames} failed because change score of " f"{total_change_score} is higher than auto-push limit {AUTOPUSH_MAX_SCORE}" ) - elif api_settings.COMMIT_CONFIRMED_MODE == 2 and not dry_run: + elif get_confirm_mode(confirm_mode_override) == 2 and not dry_run: if not changed_hosts: logger.info("None of the selected host has any changes (diff), skipping commit-confirm") logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) From afb722bfc3026227713b7994ebb80c9b3cc6aa49 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 14:43:46 +0100 Subject: [PATCH 058/169] add API parameter for confirm_mode --- src/cnaas_nms/api/device.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/cnaas_nms/api/device.py b/src/cnaas_nms/api/device.py index a451c9b1..42cc7880 100644 --- a/src/cnaas_nms/api/device.py +++ b/src/cnaas_nms/api/device.py @@ -116,6 +116,7 @@ "force": fields.Boolean(required=False), "auto_push": fields.Boolean(required=False), "resync": fields.Boolean(required=False), + "confirm_mode": fields.Integer(required=False), }, ) @@ -623,6 +624,16 @@ def post(self): kwargs["job_comment"] = json_data["comment"] if "ticket_ref" in json_data and isinstance(json_data["ticket_ref"], str): kwargs["job_ticket_ref"] = json_data["ticket_ref"] + if "confirm_mode" in json_data and isinstance(json_data["confirm_mode"], int): + if 0 >= json_data["confirm_mode"] >= 2: + kwargs["confirm_mode_override"] = json_data["confirm_mode"] + else: + return ( + empty_result( + status="error", data="If optional value confirm_mode is specified it must be an integer 0-2" + ), + 400, + ) total_count: Optional[int] = None nr = cnaas_init() From 3d136e99ae92464b161d8794080e64c6f6b4b25e Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 14:44:13 +0100 Subject: [PATCH 059/169] docs for confirm_mode in syncto API --- docs/apiref/syncto.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/apiref/syncto.rst b/docs/apiref/syncto.rst index d2a25675..f7e8b09e 100644 --- a/docs/apiref/syncto.rst +++ b/docs/apiref/syncto.rst @@ -36,6 +36,23 @@ The status success in this case only means that the job was scheduled successful you have to poll the job API to see that result of what was done, the job itself might still fail. +Configuration changes can be made in a way that requires a separate confirm call since version 1.5. +If the change can not be confirmed because the device is not unreachable for example, the device +will roll back the configuration. Before version 1.5 this concept was not supported, but from this +version it's supported and enabled by default using mode 1. + +Commit confirm modes: + - 0 = No confirm commit (default up to version 1.4) + - 1 = Commit is immediately confirmed for each device when that device is configured + (default from version 1.5) + - 2 = Commit is confirmed after all devices in the job has been configured, but only if all were + successful. This mode is only supported for EOS and JunOS so far, and only supported for small + number of devices per commit (max 50). If mode 2 is specified and an unsupported device is + selected that device will use mode 1 instead. + +Commit confirm mode can be specified in the configuration file, but it's also possible to override +that setting for a specific job using the API argument confirm_mode (see below). + Arguments: ---------- @@ -59,6 +76,8 @@ Arguments: This should be a string with max 255 characters. - ticket_ref: Optionally reference a service ticket associated with this job. This should be a string with max 32 characters. + - confirm_mode: Optionally override the default commit confirm mode (see above) for this job. + Must be an integer 0, 1 or 2 if specified. If neither hostname or device_type is specified all devices that needs to be sycnhronized will be selected. From ed20ea7e7df4891aed57771d60fc9663c16705d9 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 14:50:34 +0100 Subject: [PATCH 060/169] docs for commit_confirm_mode and commit_confirmed_timeout settings --- docs/apiref/syncto.rst | 2 ++ docs/configuration/index.rst | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/docs/apiref/syncto.rst b/docs/apiref/syncto.rst index f7e8b09e..662266dc 100644 --- a/docs/apiref/syncto.rst +++ b/docs/apiref/syncto.rst @@ -41,6 +41,8 @@ If the change can not be confirmed because the device is not unreachable for exa will roll back the configuration. Before version 1.5 this concept was not supported, but from this version it's supported and enabled by default using mode 1. +.. _commit_confirm_modes: + Commit confirm modes: - 0 = No confirm commit (default up to version 1.4) - 1 = Commit is immediately confirmed for each device when that device is configured diff --git a/docs/configuration/index.rst b/docs/configuration/index.rst index e0531507..ac33be8b 100644 --- a/docs/configuration/index.rst +++ b/docs/configuration/index.rst @@ -38,6 +38,10 @@ Defines parameters for the API: each defined management domain when assigning new management IP addresses to devices. Defaults to 5 (e.g. meaning 10.0.0.1 through 10.0.0.5 would remain unassigned on a domain for 10.0.0.0/24). +- commit_confirmed_mode: Integer specifying default commit confirm mode + (see :ref:`commit_confirm_modes`). Defaults to 1. +- commit_confirmed_timeout: Time to wait before rolling back an unconfirmed commit, + specified in seconds. Defaults to 300. /etc/cnaas-nms/repository.yml ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 773f5f7b54b5cf033470e0eb3f75d11da9170e1c Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 14:51:46 +0100 Subject: [PATCH 061/169] default api.yml settings for commit_confirmed --- docker/api/config/api.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/api/config/api.yml b/docker/api/config/api.yml index ddaf0f39..40754df9 100644 --- a/docker/api/config/api.yml +++ b/docker/api/config/api.yml @@ -9,3 +9,5 @@ certpath: /tmp/devicecerts/ global_unique_vlans: True init_mgmt_timeout: 30 mgmtdomain_reserved_count: 5 +commit_confirmed_mode: 1 +commit_confirmed_timeout: 300 From f436fbf95d028780fbac593e6b7f2a2c065535c5 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 15:16:00 +0100 Subject: [PATCH 062/169] fix argument name confirm_mode_override --- src/cnaas_nms/devicehandler/sync_devices.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 9f5e7c50..62ae47d0 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -368,7 +368,7 @@ def napalm_configure_confirmed( replace=None, commit_message: str = "", job_id: int = 0, - commit_confirm_override: Optional[int] = None, + confirm_mode_override: Optional[int] = None, ): """Configure device and set configure confirmed timeout to revert changes unless a confirm is received""" logger = get_logger() @@ -380,7 +380,7 @@ def napalm_configure_confirmed( if diff: n_device.commit_config(revert_in=api_settings.COMMIT_CONFIRMED_TIMEOUT) mode_2_supported = False - if get_confirm_mode(commit_confirm_override) == 2: + if get_confirm_mode(confirm_mode_override) == 2: if isinstance(n_device, (NapalmEOSDriver, NapalmJunOSDriver)): mode_2_supported = True else: @@ -389,7 +389,7 @@ def napalm_configure_confirmed( f"Falling back to mode 1 for device: {task.host.name}." ) - if get_confirm_mode(commit_confirm_override) == 1 or not mode_2_supported: + if get_confirm_mode(confirm_mode_override) == 1 or not mode_2_supported: if n_device.has_pending_commit(): n_device.confirm_commit() else: From d6d2369349361173af830dbd37d432a22cc7896f Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 27 Jan 2023 15:20:21 +0100 Subject: [PATCH 063/169] log commit confirmed complete --- src/cnaas_nms/devicehandler/sync_devices.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 62ae47d0..5d1ae1ae 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -401,12 +401,14 @@ def napalm_configure_confirmed( def napalm_confirm_commit(task, prev_job_id: int = 0): """Confirm a previous pending configure session""" + logger = get_logger() n_device = task.host.get_connection("napalm", task.nornir.config) if isinstance(n_device, NapalmEOSDriver): n_device.config_session = "job{}".format(prev_job_id) n_device.confirm_commit() elif isinstance(n_device, NapalmJunOSDriver): n_device.confirm_commit() + logger.debug("Commit for job {} confirmed on device {}".format(prev_job_id, task.host.name)) def push_sync_device( From 0a99f4ff2f6791de5935531005915ae85bb84e02 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 30 Jan 2023 09:55:31 +0100 Subject: [PATCH 064/169] break out post sync update confighash to separate function, and make sure confirm_devices job also runs it --- src/cnaas_nms/devicehandler/sync_devices.py | 94 +++++++++++++-------- 1 file changed, 59 insertions(+), 35 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 5d1ae1ae..805c2f6b 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -361,6 +361,52 @@ def get_confirm_mode(confirm_mode_override: Optional[int] = None) -> int: return 1 +def post_sync_update_cofighash( + dry_run: bool, force: bool, nr_filtered: Nornir, unchanged_hosts: List, failed_hosts: List +): + """Update configuration hashes for device that were configured after sync has completed. + Args: + dry_run: bool + force: bool + nr_filtered: Nornir inventory of hosts to run on + unchanged_hosts: List of hosts that has not been changed, don't update confhosh + failed_hosts: List of hosts that failed with change, don't update confhash + """ + logger = get_logger() + nr_confighash = None + if dry_run and force: + # update config hash for devices that had an empty diff because local + # changes on a device can cause reordering of CLI commands that results + # in config hash mismatch even if the calculated diff was empty + def include_filter(host, include_list=unchanged_hosts): + if host.name in include_list: + return True + else: + return False + + nr_confighash = nr_filtered.filter(filter_func=include_filter) + elif not dry_run: + # set new config hash for devices that was successfully updated + def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): + if host.name in exclude_list: + return False + else: + return True + + nr_confighash = nr_filtered.filter(filter_func=exclude_filter) + + if nr_confighash: + try: + nrresult_confighash = nr_confighash.run(task=update_config_hash) + except Exception as e: + logger.exception("Exception while updating config hashes: {}".format(str(e))) + else: + if nrresult_confighash.failed: + logger.error( + "Unable to update some config hashes: {}".format(list(nrresult_confighash.failed_hosts.keys())) + ) + + def napalm_configure_confirmed( task, dry_run=None, @@ -691,13 +737,17 @@ def confirm_devices( if nrresult.failed: logger.error("Not all devices were successfully commit-confirmed") + post_sync_update_cofighash( + dry_run=False, force=False, nr_filtered=nr_filtered, unchanged_hosts=[], failed_hosts=failed_hosts + ) + with sqla_session() as session: for host, results in nrresult.items(): if host in failed_hosts or len(results) != 1: logger.debug("Setting device as unsync for failed commit-confirm on device {}".format(host)) dev: Device = session.query(Device).filter(Device.hostname == host).one() dev.synchronized = False - dev.last_seen = datetime.datetime.utcnow() + dev.confhash = None else: dev: Device = session.query(Device).filter(Device.hostname == host).one() dev.synchronized = True @@ -822,39 +872,13 @@ def sync_devices( change_scores.append(0) logger.debug("Empty diff for host {}, 0 change score".format(host)) - # break into separate function? - nr_confighash = None - if dry_run and force: - # update config hash for devices that had an empty diff because local - # changes on a device can cause reordering of CLI commands that results - # in config hash mismatch even if the calculated diff was empty - def include_filter(host, include_list=unchanged_hosts): - if host.name in include_list: - return True - else: - return False - - nr_confighash = nr_filtered.filter(filter_func=include_filter) - elif not dry_run: - # set new config hash for devices that was successfully updated - def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): - if host.name in exclude_list: - return False - else: - return True - - nr_confighash = nr_filtered.filter(filter_func=exclude_filter) - - if nr_confighash: - try: - nrresult_confighash = nr_confighash.run(task=update_config_hash) - except Exception as e: - logger.exception("Exception while updating config hashes: {}".format(str(e))) - else: - if nrresult_confighash.failed: - logger.error( - "Unable to update some config hashes: {}".format(list(nrresult_confighash.failed_hosts.keys())) - ) + post_sync_update_cofighash( + dry_run=dry_run, + force=force, + nr_filtered=nr_filtered, + unchanged_hosts=unchanged_hosts, + failed_hosts=failed_hosts, + ) # set devices as synchronized if needed with sqla_session() as session: @@ -918,7 +942,7 @@ def exclude_filter(host, exclude_list=failed_hosts + unchanged_hosts): "cnaas_nms.devicehandler.sync_devices:confirm_devices", when=0, scheduled_by=scheduled_by, - kwargs={"prev_job_id": job_id, "hostnames": changed_hosts, "scheduled_by": scheduled_by}, + kwargs={"prev_job_id": job_id, "hostnames": changed_hosts}, ) logger.info(f"Commit-confirm for job id {job_id} scheduled as job id {next_job_id}") From a3085a5c6c74438ec6d16fb59541a7cef358d88b Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 1 Feb 2023 15:30:34 +0100 Subject: [PATCH 065/169] set thread data with jobid so logging for confirm task gets jobid prepended --- src/cnaas_nms/devicehandler/sync_devices.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 805c2f6b..97cac518 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -445,8 +445,9 @@ def napalm_configure_confirmed( return Result(host=task.host, diff=diff, changed=len(diff) > 0) -def napalm_confirm_commit(task, prev_job_id: int = 0): +def napalm_confirm_commit(task, job_id: int, prev_job_id: int): """Confirm a previous pending configure session""" + set_thread_data(job_id) logger = get_logger() n_device = task.host.get_connection("napalm", task.nornir.config) if isinstance(n_device, NapalmEOSDriver): @@ -716,7 +717,7 @@ def confirm_devices( logger.info("Device(s) selected for commit-confirm ({}): {}".format(dev_count, ", ".join(device_list))) try: - nrresult = nr_filtered.run(task=napalm_confirm_commit, prev_job_id=prev_job_id) + nrresult = nr_filtered.run(task=napalm_confirm_commit, job_id=job_id, prev_job_id=prev_job_id) except Exception as e: logger.exception("Exception while confirm-commit devices: {}".format(str(e))) try: From 3a415dd505ff62ead7b692f684d09927dba57fd5 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 1 Feb 2023 15:54:04 +0100 Subject: [PATCH 066/169] don't schedule confirm job if failed_hosts is 1 or more devices in commitmode2, instead rollback all devices --- src/cnaas_nms/devicehandler/sync_devices.py | 23 ++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 97cac518..fc1fdde6 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -873,13 +873,14 @@ def sync_devices( change_scores.append(0) logger.debug("Empty diff for host {}, 0 change score".format(host)) - post_sync_update_cofighash( - dry_run=dry_run, - force=force, - nr_filtered=nr_filtered, - unchanged_hosts=unchanged_hosts, - failed_hosts=failed_hosts, - ) + if get_confirm_mode(confirm_mode_override) != 2: + post_sync_update_cofighash( + dry_run=dry_run, + force=force, + nr_filtered=nr_filtered, + unchanged_hosts=unchanged_hosts, + failed_hosts=failed_hosts, + ) # set devices as synchronized if needed with sqla_session() as session: @@ -937,6 +938,14 @@ def sync_devices( logger.info("None of the selected host has any changes (diff), skipping commit-confirm") logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) Joblock.release_lock(session, job_id=job_id) + elif len(failed_hosts) > 0: + logger.error( + "No confirm job scheduled since one or more devices failed in commitmode 2" + ", all devices will rollback in {}s".format(api_settings.COMMIT_CONFIRMED_TIMEOUT) + ) + time.sleep(api_settings.COMMIT_CONFIRMED_TIMEOUT) + logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) + Joblock.release_lock(session, job_id=job_id) else: scheduler = Scheduler() next_job_id = scheduler.add_onetime_job( From 6ad4cc33daa6847a68971d1bb2155591ebb30891 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 3 Feb 2023 13:56:11 +0100 Subject: [PATCH 067/169] make confirm_commit task update finished_devices for job --- src/cnaas_nms/devicehandler/sync_devices.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index fc1fdde6..6f1e3083 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -456,6 +456,9 @@ def napalm_confirm_commit(task, job_id: int, prev_job_id: int): elif isinstance(n_device, NapalmJunOSDriver): n_device.confirm_commit() logger.debug("Commit for job {} confirmed on device {}".format(prev_job_id, task.host.name)) + if job_id: + with redis_session() as db: + db.lpush("finished_devices_" + str(job_id), task.host.name) def push_sync_device( From 3c7ba36a61b0fce994ba18ccaa3f62a7c6a78933 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 3 Feb 2023 14:55:21 +0100 Subject: [PATCH 068/169] add confirm_devices to progress functions to make sure finished_devices are updated --- src/cnaas_nms/scheduler/wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cnaas_nms/scheduler/wrapper.py b/src/cnaas_nms/scheduler/wrapper.py index e92e53d4..1e4aedbb 100644 --- a/src/cnaas_nms/scheduler/wrapper.py +++ b/src/cnaas_nms/scheduler/wrapper.py @@ -53,7 +53,7 @@ def wrapper(job_id: int, scheduled_by: str, kwargs={}): errmsg = "Missing job_id when starting job for {}".format(func.__name__) logger.error(errmsg) raise ValueError(errmsg) - progress_funcitons = ["sync_devices", "device_upgrade"] + progress_funcitons = ["sync_devices", "device_upgrade", "confirm_devices"] with sqla_session() as session: job = session.query(Job).filter(Job.id == job_id).one_or_none() if not job: From 3302ae47579293d1de7caae3261a452846208337 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 8 Feb 2023 13:45:01 +0100 Subject: [PATCH 069/169] allow json dumping of pathlib Path objects to string --- src/cnaas_nms/db/helper.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cnaas_nms/db/helper.py b/src/cnaas_nms/db/helper.py index bfa5e2fa..ce52d75e 100644 --- a/src/cnaas_nms/db/helper.py +++ b/src/cnaas_nms/db/helper.py @@ -1,5 +1,6 @@ import datetime from ipaddress import IPv4Address, IPv4Interface +from pathlib import Path from typing import List, Optional import netaddr @@ -147,3 +148,5 @@ def get_all_mgmtdomains(session, hostname: str) -> List[Mgmtdomain]: def json_dumper(obj): if isinstance(obj, datetime.datetime): return obj.isoformat() + if isinstance(obj, Path): + return str(obj) From ddbf01d4a15fd0cc53c8f0e4d095a18e17a9ac46 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 8 Feb 2023 13:46:06 +0100 Subject: [PATCH 070/169] move swagger docs to /api/doc/ so everything is under /api path, which makes it easier when using a reverse-proxy in front etc like cnaas-front --- src/cnaas_nms/api/app.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cnaas_nms/api/app.py b/src/cnaas_nms/api/app.py index e0d23a17..45f3dc00 100644 --- a/src/cnaas_nms/api/app.py +++ b/src/cnaas_nms/api/app.py @@ -101,7 +101,9 @@ def handle_error(self, e): jwt = JWTManager(app) -api = CnaasApi(app, prefix="/api/{}".format(__api_version__), authorizations=authorizations, security="apikey") +api = CnaasApi( + app, prefix="/api/{}".format(__api_version__), authorizations=authorizations, security="apikey", doc="/api/doc/" +) api.add_namespace(device_api) api.add_namespace(devices_api) From 7090d1d7a9a900662b497cb2c11252634d41749b Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 8 Feb 2023 13:47:11 +0100 Subject: [PATCH 071/169] add /settings/server path to get api server settings --- src/cnaas_nms/api/settings.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/cnaas_nms/api/settings.py b/src/cnaas_nms/api/settings.py index 712866ed..f0d2ad11 100644 --- a/src/cnaas_nms/api/settings.py +++ b/src/cnaas_nms/api/settings.py @@ -1,8 +1,12 @@ +import json + from flask import make_response, request from flask_restx import Namespace, Resource from cnaas_nms.api.generic import empty_result +from cnaas_nms.app_settings import api_settings from cnaas_nms.db.device import Device, DeviceType +from cnaas_nms.db.helper import json_dumper from cnaas_nms.db.session import sqla_session from cnaas_nms.db.settings import SettingsSyntaxError, check_settings_syntax, get_settings, get_settings_root from cnaas_nms.tools.mergedict import merge_dict_origin @@ -68,5 +72,15 @@ def post(self): return empty_result(status="success", data=ret) +class SettingsServerApI(Resource): + @jwt_required + def get(self): + ret_dict = {"api": api_settings.dict()} + response = make_response(json.dumps(ret_dict, default=json_dumper)) + response.headers["Content-Type"] = "application/json" + return response + + api.add_resource(SettingsApi, "") api.add_resource(SettingsModelApi, "/model") +api.add_resource(SettingsServerApI, "/server") From 0b391a1379a8529a3d4ac8f3f15f06e5e0247fbd Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 8 Feb 2023 13:47:48 +0100 Subject: [PATCH 072/169] docs example for /settings/server --- docs/apiref/settings.rst | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/docs/apiref/settings.rst b/docs/apiref/settings.rst index 67602b71..9fcd9b2c 100644 --- a/docs/apiref/settings.rst +++ b/docs/apiref/settings.rst @@ -92,3 +92,40 @@ Output: "status": "error", "message": "Validation error for setting radius_servers->0->host, bad value: 10.0.0.500 (value origin: API POST data)\nMessage: string does not match regex \"^((?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|([a-z0-9-]{1,63}\\.)([a-z-][a-z0-9-]{1,62}\\.?)+)$\", field should be: Hostname, FQDN or IP address\n" } + + +Server settings: +---------------- + +To get configuration settings for the API server, send a GET request to: + +:: + + curl https://hostname/api/v1.0/settings/server + +Output will very depending on version of NMS running, example: + +:: + + { + "api": { + "HOST": "172.30.0.1", + "HTTPD_URL": "https://cnaas_httpd:1443/api/v1.0/firmware", + "VERIFY_TLS": true, + "VERIFY_TLS_DEVICE": false, + "JWT_CERT": "/etc/cnaas-nms/public.pem", + "CAFILE": "/etc/cnaas-nms/certs/rootCA.crt", + "CAKEYFILE": "/etc/cnaas-nms/certs/rootCA.key", + "CERTPATH": "/etc/cnaas-nms/certs", + "ALLOW_APPLY_CONFIG_LIVERUN": false, + "FIRMWARE_URL": "https://cnaas_httpd:1443/api/v1.0/firmware", + "JWT_ENABLED": true, + "PLUGIN_FILE": "/etc/cnaas-nms/plugins.yml", + "GLOBAL_UNIQUE_VLANS": true, + "INIT_MGMT_TIMEOUT": 30, + "MGMTDOMAIN_RESERVED_COUNT": 5, + "COMMIT_CONFIRMED_MODE": 2, + "COMMIT_CONFIRMED_TIMEOUT": 300, + "SETTINGS_OVERRIDE": null + } + } From ee6748650203df21faa265df2ba8d3562b9e6048 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 8 Feb 2023 14:29:41 +0100 Subject: [PATCH 073/169] convert api test_settings to pytest --- src/cnaas_nms/api/tests/test_settings.py | 58 +++++++++++++++--------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/src/cnaas_nms/api/tests/test_settings.py b/src/cnaas_nms/api/tests/test_settings.py index 23ae04c7..c89ba613 100644 --- a/src/cnaas_nms/api/tests/test_settings.py +++ b/src/cnaas_nms/api/tests/test_settings.py @@ -1,35 +1,49 @@ import os -import unittest import pkg_resources +import pytest import yaml +from flask.testing import FlaskClient from cnaas_nms.api import app from cnaas_nms.api.tests.app_wrapper import TestAppWrapper -class SettingsTests(unittest.TestCase): - def setUp(self): - self.jwt_auth_token = None - data_dir = pkg_resources.resource_filename(__name__, "data") - with open(os.path.join(data_dir, "testdata.yml"), "r") as f_testdata: - self.testdata = yaml.safe_load(f_testdata) - if "jwt_auth_token" in self.testdata: - self.jwt_auth_token = self.testdata["jwt_auth_token"] - self.app = app.app - self.app.wsgi_app = TestAppWrapper(self.app.wsgi_app, self.jwt_auth_token) - self.client = self.app.test_client() +@pytest.fixture +def testdata(scope="module") -> dict: + data_dir = pkg_resources.resource_filename(__name__, "data") + with open(os.path.join(data_dir, "testdata.yml"), "r") as f_testdata: + return yaml.safe_load(f_testdata) - def test_invalid_setting(self): - settings_data = {"ntp_servers": [{"host": "10.0.0.500"}]} - result = self.client.post("/api/v1.0/settings/model", json=settings_data) - self.assertEqual(result.status_code, 400) - def test_valid_setting(self): - settings_data = {"ntp_servers": [{"host": "10.0.0.50"}]} - result = self.client.post("/api/v1.0/settings/model", json=settings_data) - self.assertEqual(result.status_code, 200) +@pytest.fixture +def testclient(testdata: dict, scope="module") -> FlaskClient: + nms_app = app.app + nms_app.wsgi_app = TestAppWrapper(nms_app.wsgi_app, testdata["jwt_auth_token"]) + return nms_app.test_client() -if __name__ == "__main__": - unittest.main() +def test_invalid_setting(testclient: FlaskClient): + settings_data = {"ntp_servers": [{"host": "10.0.0.500"}]} + result = testclient.post("/api/v1.0/settings/model", json=settings_data) + assert result.status_code == 400 + + +def test_valid_setting(testclient: FlaskClient): + settings_data = {"ntp_servers": [{"host": "10.0.0.50"}]} + result = testclient.post("/api/v1.0/settings/model", json=settings_data) + assert result.status_code == 200 + + +def test_settings_model(testclient: FlaskClient): + result = testclient.get("/api/v1.0/settings/model") + assert result.status_code == 200 + assert result.content_type == "application/json" + assert "definitions" in result.json + + +def test_settings_server(testclient: FlaskClient): + result = testclient.get("/api/v1.0/settings/server") + assert result.status_code == 200 + assert result.content_type == "application/json" + assert "api" in result.json From 124e0a81c35cf1b53fb09898453386e3e45f3fce Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 22 Feb 2023 17:40:13 +0100 Subject: [PATCH 074/169] expand interface ranges in get verify_peer_device so init works on interface range ports --- src/cnaas_nms/devicehandler/get.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/cnaas_nms/devicehandler/get.py b/src/cnaas_nms/devicehandler/get.py index d9629538..237f5d1a 100644 --- a/src/cnaas_nms/devicehandler/get.py +++ b/src/cnaas_nms/devicehandler/get.py @@ -9,6 +9,7 @@ import cnaas_nms.devicehandler.nornir_helper from cnaas_nms.db.device import Device, DeviceType +from cnaas_nms.db.device_vars import expand_interface_settings from cnaas_nms.db.interface import Interface, InterfaceConfigType, InterfaceError from cnaas_nms.tools.log import get_logger @@ -241,7 +242,7 @@ def verify_peer_iftype( # Make sure interface with peers are configured in settings for CORE and DIST devices if remote_dev.device_type in [DeviceType.DIST, DeviceType.CORE]: match = False - for intf in remote_device_settings["interfaces"]: + for intf in expand_interface_settings(remote_device_settings["interfaces"]): if intf["name"] == remote_if: match = True if not match: @@ -250,7 +251,7 @@ def verify_peer_iftype( ) if local_dev.device_type in [DeviceType.DIST, DeviceType.CORE]: match = False - for intf in local_device_settings["interfaces"]: + for intf in expand_interface_settings(local_device_settings["interfaces"]): if intf["name"] == local_if: match = True if not match: @@ -263,13 +264,13 @@ def verify_peer_iftype( DeviceType.DIST, DeviceType.CORE, ]: - for intf in local_device_settings["interfaces"]: + for intf in expand_interface_settings(local_device_settings["interfaces"]): if intf["name"] == local_if and intf["ifclass"] != "fabric": raise InterfaceError( "Local device interface is not configured as fabric: " "{} {} ifclass: {}".format(local_dev.hostname, intf["name"], intf["ifclass"]) ) - for intf in remote_device_settings["interfaces"]: + for intf in expand_interface_settings(remote_device_settings["interfaces"]): if intf["name"] == remote_if and intf["ifclass"] != "fabric": raise InterfaceError( "Peer device interface is not configured as fabric: " @@ -279,7 +280,7 @@ def verify_peer_iftype( # Make sure that an access switch is connected to an interface # configured as "downlink" on the remote end if local_dev.device_type == DeviceType.ACCESS and remote_dev.device_type == DeviceType.DIST: - for intf in remote_device_settings["interfaces"]: + for intf in expand_interface_settings(remote_device_settings["interfaces"]): if intf["name"] == remote_if and intf["ifclass"] != "downlink": raise InterfaceError( "Peer device interface is not configured as downlink: " From eee258f30196923829f10bfa0ce754da7803b5fd Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 22 Feb 2023 18:12:54 +0100 Subject: [PATCH 075/169] allow setting interface data untagged_vlan to null --- src/cnaas_nms/api/interface.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/cnaas_nms/api/interface.py b/src/cnaas_nms/api/interface.py index cbbb0a84..75f21f38 100644 --- a/src/cnaas_nms/api/interface.py +++ b/src/cnaas_nms/api/interface.py @@ -102,15 +102,19 @@ def put(self, hostname): "Specified VXLAN {} is not present in {}".format(if_dict["data"]["vxlan"], hostname) ) if "untagged_vlan" in if_dict["data"]: - vlan_id = resolve_vlanid(if_dict["data"]["untagged_vlan"], device_settings["vxlans"]) - if vlan_id: - intfdata["untagged_vlan"] = if_dict["data"]["untagged_vlan"] + if if_dict["data"]["untagged_vlan"] is None: + if "untagged_vlan" in intfdata: + del intfdata["untagged_vlan"] else: - errors.append( - "Specified VLAN name {} is not present in {}".format( - if_dict["data"]["untagged_vlan"], hostname + vlan_id = resolve_vlanid(if_dict["data"]["untagged_vlan"], device_settings["vxlans"]) + if vlan_id: + intfdata["untagged_vlan"] = if_dict["data"]["untagged_vlan"] + else: + errors.append( + "Specified VLAN name {} is not present in {}".format( + if_dict["data"]["untagged_vlan"], hostname + ) ) - ) if "tagged_vlan_list" in if_dict["data"]: if isinstance(if_dict["data"]["tagged_vlan_list"], list): vlan_id_list = resolve_vlanid_list( From cfe4c9a62db46f6c2553a254a54cf7bd5f2ca2c3 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 22 Feb 2023 18:31:07 +0100 Subject: [PATCH 076/169] don't return duplicates of neighbors in get_neighbors() --- src/cnaas_nms/db/device.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cnaas_nms/db/device.py b/src/cnaas_nms/db/device.py index 76c48029..b3fc7c81 100644 --- a/src/cnaas_nms/db/device.py +++ b/src/cnaas_nms/db/device.py @@ -112,11 +112,11 @@ def as_dict(self) -> dict: d[col.name] = value return d - def get_neighbors(self, session, linknets: Optional[List[dict]] = None) -> List[Device]: + def get_neighbors(self, session, linknets: Optional[List[dict]] = None) -> Set[Device]: """Look up neighbors from cnaas_nms.db.linknet.Linknets and return them as a list of Device objects.""" if not linknets: linknets = self.get_linknets(session) - ret = [] + ret: Set = set() for linknet in linknets: if isinstance(linknet, cnaas_nms.db.linknet.Linknet): device_a_id = linknet.device_a_id @@ -125,9 +125,9 @@ def get_neighbors(self, session, linknets: Optional[List[dict]] = None) -> List[ device_a_id = linknet["device_a_id"] device_b_id = linknet["device_b_id"] if device_a_id == self.id: - ret.append(session.query(Device).filter(Device.id == device_b_id).one()) + ret.add(session.query(Device).filter(Device.id == device_b_id).one()) else: - ret.append(session.query(Device).filter(Device.id == device_a_id).one()) + ret.add(session.query(Device).filter(Device.id == device_a_id).one()) return ret def get_linknets(self, session) -> List[cnaas_nms.db.linknet.Linknet]: From d8ed9e6387e0e4dae144680f1f77f9459a9a9c52 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 22 Feb 2023 19:04:04 +0100 Subject: [PATCH 077/169] make sure we compare to set in unittest --- src/cnaas_nms/db/tests/test_device.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cnaas_nms/db/tests/test_device.py b/src/cnaas_nms/db/tests/test_device.py index b658e1ec..66ef49df 100644 --- a/src/cnaas_nms/db/tests/test_device.py +++ b/src/cnaas_nms/db/tests/test_device.py @@ -81,8 +81,8 @@ def test_get_neighbors(self): Linknet(device_a=device1, device_b=device2) device1 = session.query(Device).filter(Device.hostname == "test-device1").one() device2 = session.query(Device).filter(Device.hostname == "test-device2").one() - self.assertEqual([device2], device1.get_neighbors(session)) - self.assertEqual([device1], device2.get_neighbors(session)) + self.assertEqual(set([device2]), device1.get_neighbors(session)) + self.assertEqual(set([device1]), device2.get_neighbors(session)) def test_is_stack(self): with sqla_session() as session: From edd9f7f515f7fe9a5fd0277adf245a8685217282 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 28 Feb 2023 09:32:28 +0100 Subject: [PATCH 078/169] try allowing capital letters in hostname and domain for settings --- src/cnaas_nms/db/settings_fields.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cnaas_nms/db/settings_fields.py b/src/cnaas_nms/db/settings_fields.py index 2aac0b1e..3d344345 100644 --- a/src/cnaas_nms/db/settings_fields.py +++ b/src/cnaas_nms/db/settings_fields.py @@ -18,10 +18,10 @@ r"[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|" # 1::3:4:5:6:7:8 1::3:4:5:6:7:8 1::8 r":((:[0-9a-fA-F]{1,4}){1,7}|:))" ) -FQDN_REGEX = r"([a-z0-9-]{1,63}\.)([a-z-][a-z0-9-]{1,62}\.?)+" +FQDN_REGEX = r"([a-zA-Z0-9-]{1,63}\.)([a-z-][a-z0-9-]{1,62}\.?)+" HOST_REGEX = f"^({IPV4_REGEX}|{IPV6_REGEX}|{FQDN_REGEX})$" -HOSTNAME_REGEX = r"^([a-z0-9-]{1,63})(\.[a-z0-9-]{1,63})*$" -DOMAIN_NAME_REGEX = r"^([a-z0-9-]{1,63})(\.[a-z0-9-]{1,63})+$" +HOSTNAME_REGEX = r"^([a-zA-Z0-9-]{1,63})(\.[a-z0-9-]{1,63})*$" +DOMAIN_NAME_REGEX = r"^([a-zA-Z0-9-]{1,63})(\.[a-z0-9-]{1,63})+$" host_schema = Field(..., regex=HOST_REGEX, max_length=253, description="Hostname, FQDN or IP address") hostname_schema = Field(..., regex=HOSTNAME_REGEX, max_length=253, description="Hostname or FQDN") domain_name_schema = Field(None, regex=DOMAIN_NAME_REGEX, max_length=251, description="DNS domain name") From 1c9251c017d15ff34be41c3ddaf38c43de239311 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 1 Mar 2023 09:00:07 +0100 Subject: [PATCH 079/169] allow group names with - in name --- src/cnaas_nms/db/settings_fields.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cnaas_nms/db/settings_fields.py b/src/cnaas_nms/db/settings_fields.py index 3d344345..cb1a9d34 100644 --- a/src/cnaas_nms/db/settings_fields.py +++ b/src/cnaas_nms/db/settings_fields.py @@ -58,7 +58,7 @@ ebgp_multihop_schema = Field(None, ge=1, le=255, description="Numeric IP TTL, 1-255") maximum_routes_schema = Field(None, ge=0, le=4294967294, description="Maximum number of routes to receive from peer") -GROUP_NAME = r"^([a-zA-Z0-9_]{1,63}\.?)+$" +GROUP_NAME = r"^([a-zA-Z0-9_-]{1,63}\.?)+$" group_name = Field(..., regex=GROUP_NAME, max_length=253) group_priority_schema = Field( 0, ge=0, le=100, description="Group priority 0-100, default 0, higher value means higher priority" From adde8780af9f21845852c9da187438e4cab196dd Mon Sep 17 00:00:00 2001 From: Morten Brekkevold Date: Thu, 2 Mar 2023 12:57:57 +0100 Subject: [PATCH 080/169] Support branch refspecs also for dhcpd image The dhcpd docker image clones GITREPO_ETC using a shell script, and consequently did not support a branch refspec anchor, as is supported by the Python codebase of CNaaS-NMS. This adds refspec support to GITREPO_ETC by splitting off the anchor using shell commands available in the docker image (specifically, `cut`). --- docker/dhcpd/dhcpd.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docker/dhcpd/dhcpd.sh b/docker/dhcpd/dhcpd.sh index e2f1f964..5a0518bd 100755 --- a/docker/dhcpd/dhcpd.sh +++ b/docker/dhcpd/dhcpd.sh @@ -6,7 +6,12 @@ if [ ! -z "$GITREPO_ETC" ] then cd /opt/cnaas rm -rf /opt/cnaas/etc - git clone $GITREPO_ETC etc + base_url=$(echo $GITREPO_ETC | cut -d\# -f1) + branch=$(echo $GITREPO_ETC | cut -d\# -s -f2) + if [ -n "$branch" ]; then + branch="-b $branch" + fi + git clone $branch $base_url etc if [ -f "/opt/cnaas/etc/dhcpd/dhcpd.conf" ] then cp /opt/cnaas/etc/dhcpd/dhcpd.conf /opt/cnaas/dhcpd.conf From cdd0f3b3a46d34de83f1e021f81712378563b4d5 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 21 Mar 2023 15:46:49 +0100 Subject: [PATCH 081/169] include ssh-client in api container, useful for git clone via ssh --- docker/api/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/api/Dockerfile b/docker/api/Dockerfile index 3b1cd4a7..07ae6875 100644 --- a/docker/api/Dockerfile +++ b/docker/api/Dockerfile @@ -29,6 +29,7 @@ RUN apt-get update \ psmisc \ supervisor \ uwsgi-plugin-python3 \ + ssh-client \ && pip3 install --no-cache-dir uwsgi # Prepare for supervisord, ngninx From 99875dbaa2f0f44fe19b7ba451a897e6dfd13c8c Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 28 Feb 2023 09:32:28 +0100 Subject: [PATCH 082/169] try allowing capital letters in hostname and domain for settings --- src/cnaas_nms/db/settings_fields.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cnaas_nms/db/settings_fields.py b/src/cnaas_nms/db/settings_fields.py index 2aac0b1e..3d344345 100644 --- a/src/cnaas_nms/db/settings_fields.py +++ b/src/cnaas_nms/db/settings_fields.py @@ -18,10 +18,10 @@ r"[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|" # 1::3:4:5:6:7:8 1::3:4:5:6:7:8 1::8 r":((:[0-9a-fA-F]{1,4}){1,7}|:))" ) -FQDN_REGEX = r"([a-z0-9-]{1,63}\.)([a-z-][a-z0-9-]{1,62}\.?)+" +FQDN_REGEX = r"([a-zA-Z0-9-]{1,63}\.)([a-z-][a-z0-9-]{1,62}\.?)+" HOST_REGEX = f"^({IPV4_REGEX}|{IPV6_REGEX}|{FQDN_REGEX})$" -HOSTNAME_REGEX = r"^([a-z0-9-]{1,63})(\.[a-z0-9-]{1,63})*$" -DOMAIN_NAME_REGEX = r"^([a-z0-9-]{1,63})(\.[a-z0-9-]{1,63})+$" +HOSTNAME_REGEX = r"^([a-zA-Z0-9-]{1,63})(\.[a-z0-9-]{1,63})*$" +DOMAIN_NAME_REGEX = r"^([a-zA-Z0-9-]{1,63})(\.[a-z0-9-]{1,63})+$" host_schema = Field(..., regex=HOST_REGEX, max_length=253, description="Hostname, FQDN or IP address") hostname_schema = Field(..., regex=HOSTNAME_REGEX, max_length=253, description="Hostname or FQDN") domain_name_schema = Field(None, regex=DOMAIN_NAME_REGEX, max_length=251, description="DNS domain name") From c668833664d497af94c63a62aec0e9806414e8d1 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 1 Mar 2023 09:00:07 +0100 Subject: [PATCH 083/169] allow group names with - in name --- src/cnaas_nms/db/settings_fields.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cnaas_nms/db/settings_fields.py b/src/cnaas_nms/db/settings_fields.py index 3d344345..cb1a9d34 100644 --- a/src/cnaas_nms/db/settings_fields.py +++ b/src/cnaas_nms/db/settings_fields.py @@ -58,7 +58,7 @@ ebgp_multihop_schema = Field(None, ge=1, le=255, description="Numeric IP TTL, 1-255") maximum_routes_schema = Field(None, ge=0, le=4294967294, description="Maximum number of routes to receive from peer") -GROUP_NAME = r"^([a-zA-Z0-9_]{1,63}\.?)+$" +GROUP_NAME = r"^([a-zA-Z0-9_-]{1,63}\.?)+$" group_name = Field(..., regex=GROUP_NAME, max_length=253) group_priority_schema = Field( 0, ge=0, le=100, description="Group priority 0-100, default 0, higher value means higher priority" From b660267ad0f831ccdde8342c5580ac862914f462 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 1 Mar 2023 09:37:34 +0100 Subject: [PATCH 084/169] security fixes for werkzeug --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index bd42d03b..52769bf2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,5 +31,5 @@ SQLAlchemy==1.4.41 sqlalchemy-stubs==0.4 SQLAlchemy-Utils==0.38.3 pydantic==1.10.2 -Werkzeug==2.1.2 +Werkzeug==2.2.3 greenlet==1.1.3 From d930ff4351eb097ee66f8441b4740abf8a89b422 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 1 Mar 2023 10:32:38 +0100 Subject: [PATCH 085/169] update flask-restx to work with security fixed werkzeug --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 52769bf2..0522b06a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ APScheduler==3.9.1 coverage==6.4.4 Flask-Cors==3.0.10 Flask-JWT-Extended==4.4.4 -flask-restx==0.5.1 +flask-restx==1.0.5 flask==2.1.2 Flask-SocketIO==5.3.1 gevent==21.12.0 From 62f01d5ecd7842fbeb0fa10c5a1aa17bca737ad0 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 22 Mar 2023 09:08:31 +0100 Subject: [PATCH 086/169] Add changelog for 1.4.1 --- docs/changelog/index.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/changelog/index.rst b/docs/changelog/index.rst index 5241e43c..ca7941d1 100644 --- a/docs/changelog/index.rst +++ b/docs/changelog/index.rst @@ -1,6 +1,15 @@ Changelog ========= +Version 1.4.1 +------------- + +Bug fixes: + + - Fixed interface range expansion logic for downlink ports during init + - Allow setting of untagged_vlan to null in interfaces API (#290) + - Fix duplicate generation of underlay BGP peers (#291) + Version 1.4.0 ------------- From 5a780f53f0fd429d338ed46da694d4983b277aad Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 22 Mar 2023 09:41:17 +0100 Subject: [PATCH 087/169] make dev docker-compose only bind to the br-cnaas bridge ip --- docker/docker-compose.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 9b2a5644..f2a94035 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -116,9 +116,10 @@ networks: name: cnaas ipam: config: - - subnet: 172.30.0.0/24 + - subnet: 172.30.0.0/24 driver_opts: com.docker.network.bridge.name: br-cnaas + com.docker.network.bridge.host_binding_ipv4: "172.30.0.1" volumes: cnaas-templates: From 5bdc08cec8f3b6339022591926741724facdef3b Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 29 Mar 2023 09:51:22 +0200 Subject: [PATCH 088/169] Revert "make dev docker-compose only bind to the br-cnaas bridge ip" causes issues with dhcpd etc in my lab This reverts commit 5a780f53f0fd429d338ed46da694d4983b277aad. --- docker/docker-compose.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index f2a94035..9b2a5644 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -116,10 +116,9 @@ networks: name: cnaas ipam: config: - - subnet: 172.30.0.0/24 + - subnet: 172.30.0.0/24 driver_opts: com.docker.network.bridge.name: br-cnaas - com.docker.network.bridge.host_binding_ipv4: "172.30.0.1" volumes: cnaas-templates: From 8cbd098f027d96de8149e32d9a4ea5b168342409 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Thu, 30 Mar 2023 14:25:42 +0200 Subject: [PATCH 089/169] this command must have been left uncommented after some cleanup in 6740188c814985fb7059faa6e05e2f484cd579e3 I guess --- test/integrationtests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integrationtests.sh b/test/integrationtests.sh index 93201e14..1ae0f319 100755 --- a/test/integrationtests.sh +++ b/test/integrationtests.sh @@ -110,7 +110,7 @@ echo "Gathering coverage reports from integration tests:" MULE_PID="`docker logs docker_cnaas_api_1 | awk '/spawned uWSGI mule/{print $6}' | egrep -o "[0-9]+" | tail -n1`" echo "Found mule at pid $MULE_PID" # Allow for code coverage files to be saved -#docker-compose exec -u root -T cnaas_api chown -R www-data:www-data /opt/cnaas/venv/cnaas-nms/src/ +docker-compose exec -u root -T cnaas_api chown -R www-data:www-data /opt/cnaas/venv/cnaas-nms/src/ curl -ks -H "Authorization: Bearer $JWT_AUTH_TOKEN" "https://localhost/api/v1.0/system/shutdown" -d "{}" -X POST -H "Content-Type: application/json" sleep 3 From b91164c6544296e32e299a1317b20f2755d5ea7a Mon Sep 17 00:00:00 2001 From: Morten Brekkevold Date: Wed, 26 Oct 2022 13:57:59 +0200 Subject: [PATCH 090/169] Add ipv6_gw field to mgmtdomain --- ...ee221a7_add_ipv6_gw_field_to_mgmtdomain.py | 24 +++++++++++++++++++ src/cnaas_nms/db/mgmtdomain.py | 1 + 2 files changed, 25 insertions(+) create mode 100644 alembic/versions/2f9faee221a7_add_ipv6_gw_field_to_mgmtdomain.py diff --git a/alembic/versions/2f9faee221a7_add_ipv6_gw_field_to_mgmtdomain.py b/alembic/versions/2f9faee221a7_add_ipv6_gw_field_to_mgmtdomain.py new file mode 100644 index 00000000..03ca7c1a --- /dev/null +++ b/alembic/versions/2f9faee221a7_add_ipv6_gw_field_to_mgmtdomain.py @@ -0,0 +1,24 @@ +"""Add ipv6_gw field to Mgmtdomain + +Revision ID: 2f9faee221a7 +Revises: b7629362583c +Create Date: 2022-10-26 13:52:12.466111 + +""" +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "2f9faee221a7" +down_revision = "b7629362583c" +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column("mgmtdomain", sa.Column("ipv6_gw", sa.Unicode(43))) + + +def downgrade(): + op.drop_column("mgmtdomain", "ipv6_gw") diff --git a/src/cnaas_nms/db/mgmtdomain.py b/src/cnaas_nms/db/mgmtdomain.py index ed8e4155..156490bb 100644 --- a/src/cnaas_nms/db/mgmtdomain.py +++ b/src/cnaas_nms/db/mgmtdomain.py @@ -25,6 +25,7 @@ class Mgmtdomain(cnaas_nms.db.base.Base): ) id = Column(Integer, autoincrement=True, primary_key=True) ipv4_gw = Column(Unicode(18)) # 255.255.255.255/32 + ipv6_gw = Column(Unicode(43)) # fe80:0000:0000:0000:0000:0000:0000:0000/128 device_a_id = Column(Integer, ForeignKey("device.id")) device_a = relationship("Device", foreign_keys=[device_a_id]) device_a_ip = Column(IPAddressType) From d1a545d1385d41666bd7a1d50ee1f017b966eaef Mon Sep 17 00:00:00 2001 From: Morten Brekkevold Date: Wed, 26 Oct 2022 15:51:06 +0200 Subject: [PATCH 091/169] Accept ipv6_gw argument in mgmtdomain APIs Valid IPv6 interface addresses must be accepted by the mgmtdomain API endpoints. Additionally, an mgmtdomain can be either IPv4, IPv6 or both, meaning that there are now multiple sets of minimally required attributes to post to the endpoint. This should ensure backwards compatibility with API clients that only support ipv4_gw --- src/cnaas_nms/api/mgmtdomain.py | 40 +++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/src/cnaas_nms/api/mgmtdomain.py b/src/cnaas_nms/api/mgmtdomain.py index f64d468e..0bf6bf56 100644 --- a/src/cnaas_nms/api/mgmtdomain.py +++ b/src/cnaas_nms/api/mgmtdomain.py @@ -1,4 +1,4 @@ -from ipaddress import IPv4Interface +from ipaddress import IPv4Interface, IPv6Interface from typing import Optional from flask import request @@ -29,6 +29,7 @@ "device_b": fields.String(required=True), "vlan": fields.Integer(required=True), "ipv4_gw": fields.String(required=True), + "ipv6_gw": fields.String(required=True), "description": fields.String(required=False), }, ) @@ -37,6 +38,7 @@ class f_mgmtdomain(BaseModel): vlan: Optional[int] = vlan_id_schema_optional ipv4_gw: Optional[str] = None + ipv6_gw: Optional[str] = None description: Optional[str] = None @validator("ipv4_gw") @@ -56,6 +58,24 @@ def ipv4_gw_valid_address(cls, v, values, **kwargs): return v + @validator("ipv6_gw") + @classmethod + def ipv6_gw_valid_address(cls, v, values, **kwargs): + try: + addr = IPv6Interface(v) + prefix_len = int(addr.network.prefixlen) + except Exception: # noqa: S110 + raise ValueError("Invalid ipv6_gw received. Must be correct IPv6 address with mask") + else: + if addr.ip == addr.network.network_address: + raise ValueError("Specify gateway address, not subnet address") + if addr.ip == addr.network.broadcast_address: + raise ValueError("Specify gateway address, not broadcast address") + if prefix_len >= 126 or prefix_len <= 63: + raise ValueError("Bad prefix length {} for management network".format(prefix_len)) + + return v + class MgmtdomainByIdApi(Resource): @jwt_required @@ -163,12 +183,20 @@ def post(self): except ValidationError as e: errors += parse_pydantic_error(e, f_mgmtdomain, json_data) - required_keys = ["device_a", "device_b", "vlan", "ipv4_gw"] - if all([key in data for key in required_keys]) and all([key in json_data for key in required_keys]): + required_keys_1 = ["device_a", "device_b", "vlan", "ipv4_gw"] + required_keys_2 = ["device_a", "device_b", "vlan", "ipv6_gw"] + required_in_data = all(key in data for key in required_keys_1) or all( + key in data for key in required_keys_2 + ) + required_in_json_data = all(key in json_data for key in required_keys_1) or all( + key in json_data for key in required_keys_2 + ) + if required_in_data and required_in_json_data: new_mgmtd = Mgmtdomain() new_mgmtd.device_a = data["device_a"] new_mgmtd.device_b = data["device_b"] new_mgmtd.ipv4_gw = data["ipv4_gw"] + new_mgmtd.ipv6_gw = data["ipv6_gw"] new_mgmtd.vlan = data["vlan"] try: session.add(new_mgmtd) @@ -184,7 +212,11 @@ def post(self): device_b.synchronized = False return empty_result(status="success", data={"added_mgmtdomain": new_mgmtd.as_dict()}), 200 else: - errors.append("Not all required inputs were found: {}".format(", ".join(required_keys))) + errors.append( + "Not all required inputs were found: {} OR {}".format( + ", ".join(required_keys_1), ", ".join(required_keys_2) + ) + ) return empty_result("error", errors), 400 From ce1d4c38e5e6a156748ee23e3965f9234a7902e1 Mon Sep 17 00:00:00 2001 From: Morten Brekkevold Date: Thu, 27 Oct 2022 12:16:55 +0200 Subject: [PATCH 092/169] Accept IPv6 addresses on all Device IP attrs --- src/cnaas_nms/db/device.py | 14 +++++++------- src/cnaas_nms/devicehandler/init_device.py | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/cnaas_nms/db/device.py b/src/cnaas_nms/db/device.py index b3fc7c81..1522ee9b 100644 --- a/src/cnaas_nms/db/device.py +++ b/src/cnaas_nms/db/device.py @@ -105,7 +105,7 @@ def as_dict(self) -> dict: value = value.name elif issubclass(value.__class__, cnaas_nms.db.base.Base): continue - elif issubclass(value.__class__, ipaddress.IPv4Address): + elif issubclass(value.__class__, ipaddress._BaseAddress): value = str(value) elif issubclass(value.__class__, datetime.datetime): value = str(value) @@ -379,9 +379,9 @@ def validate(cls, new_entry=True, **kwargs): if "management_ip" in kwargs: if kwargs["management_ip"]: try: - addr = ipaddress.IPv4Address(kwargs["management_ip"]) + addr = ipaddress.ip_address(kwargs["management_ip"]) except Exception: - errors.append("Invalid management_ip received. Must be correct IPv4 address.") + errors.append("Invalid management_ip received. Must be a valid IP address.") else: data["management_ip"] = addr else: @@ -390,9 +390,9 @@ def validate(cls, new_entry=True, **kwargs): if "infra_ip" in kwargs: if kwargs["infra_ip"]: try: - addr = ipaddress.IPv4Address(kwargs["infra_ip"]) + addr = ipaddress.ip_address(kwargs["infra_ip"]) except Exception: - errors.append("Invalid infra_ip received. Must be correct IPv4 address.") + errors.append("Invalid infra_ip received. Must be valid IP address.") else: data["infra_ip"] = addr else: @@ -401,9 +401,9 @@ def validate(cls, new_entry=True, **kwargs): if "dhcp_ip" in kwargs: if kwargs["dhcp_ip"]: try: - addr = ipaddress.IPv4Address(kwargs["dhcp_ip"]) + addr = ipaddress.ip_address(kwargs["dhcp_ip"]) except Exception: - errors.append("Invalid dhcp_ip received. Must be correct IPv4 address.") + errors.append("Invalid dhcp_ip received. Must be valid IP address.") else: data["dhcp_ip"] = addr else: diff --git a/src/cnaas_nms/devicehandler/init_device.py b/src/cnaas_nms/devicehandler/init_device.py index a4e9eb14..54731145 100644 --- a/src/cnaas_nms/devicehandler/init_device.py +++ b/src/cnaas_nms/devicehandler/init_device.py @@ -1,6 +1,6 @@ import datetime import os -from ipaddress import IPv4Address, IPv4Interface +from ipaddress import IPv4Address, IPv4Interface, ip_interface from typing import List, Optional, Union import yaml @@ -507,9 +507,9 @@ def init_access_device_step1( session.add(reserved_ip) session.commit() # Populate variables for template rendering - mgmt_gw_ipif = IPv4Interface(mgmtdomain.ipv4_gw) + mgmt_gw_ipif = ip_interface(mgmtdomain.ipv6_gw or mgmtdomain.ipv4_gw) mgmt_variables = { - "mgmt_ipif": str(IPv4Interface("{}/{}".format(mgmt_ip, mgmt_gw_ipif.network.prefixlen))), + "mgmt_ipif": str(ip_interface("{}/{}".format(mgmt_ip, mgmt_gw_ipif.network.prefixlen))), "mgmt_ip": str(mgmt_ip), "mgmt_prefixlen": int(mgmt_gw_ipif.network.prefixlen), "mgmt_vlan_id": mgmtdomain.vlan, From 4d885670826cf487a01c33b2d20de66ae5f686bb Mon Sep 17 00:00:00 2001 From: Morten Brekkevold Date: Mon, 5 Dec 2022 09:59:27 +0100 Subject: [PATCH 093/169] Add IPv6 to Mgmtdomain.find_free_mgmt_ip The new version of this function can take the optional version argument to select whether to get an address from the IPv4 or the IPv6 network of the mgmtdomain. --- src/cnaas_nms/db/mgmtdomain.py | 24 +++++++++++++++++------ src/cnaas_nms/db/tests/test_mgmtdomain.py | 11 +++++++++++ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/cnaas_nms/db/mgmtdomain.py b/src/cnaas_nms/db/mgmtdomain.py index 156490bb..e6f7c893 100644 --- a/src/cnaas_nms/db/mgmtdomain.py +++ b/src/cnaas_nms/db/mgmtdomain.py @@ -1,9 +1,9 @@ import datetime import enum import ipaddress -from ipaddress import IPv4Address, IPv4Interface +from ipaddress import IPv4Address, IPv6Address, ip_interface from itertools import dropwhile, islice -from typing import Optional, Set +from typing import Optional, Set, Union from sqlalchemy import Column, ForeignKey, Integer, String, Unicode, UniqueConstraint from sqlalchemy.orm import load_only, relationship @@ -16,6 +16,8 @@ from cnaas_nms.db.device import Device from cnaas_nms.db.reservedip import ReservedIP +IPAddress = Union[IPv4Address, IPv6Address] + class Mgmtdomain(cnaas_nms.db.base.Base): __tablename__ = "mgmtdomain" @@ -59,20 +61,30 @@ def as_dict(self): pass return d - def find_free_mgmt_ip(self, session) -> Optional[IPv4Address]: - """Return first available IPv4 address from this Mgmtdomain's ipv4_gw network.""" + def find_free_mgmt_ip(self, session, version: int = 4) -> Optional[IPAddress]: + """Returns the first available IP address from this Mgmtdomain's network. + + Defaults to returning an IPv4 address from ipv4_gw. Set version=6 to get an address from + the equivalent IPv6 network of ipv6_gw. + """ taken_ips = self._get_taken_ips(session) def is_taken(addr): return addr in taken_ips - mgmt_net = IPv4Interface(self.ipv4_gw).network + if version not in (4, 6): + raise ValueError("version must be 4 or 6") + intf_addr = self.ipv4_gw if version == 4 else self.ipv6_gw + if intf_addr is None: + return None # can't find an addr if no subnet is defined + else: + mgmt_net = ip_interface(intf_addr).network candidates = islice(mgmt_net.hosts(), api_settings.MGMTDOMAIN_RESERVED_COUNT, None) free_ips = dropwhile(is_taken, candidates) return next(free_ips, None) @staticmethod - def _get_taken_ips(session) -> Set[IPv4Address]: + def _get_taken_ips(session) -> Set[IPAddress]: """Returns the full set of taken (used + reserved) IP addresses""" device_query = ( session.query(Device).filter(Device.management_ip is not None).options(load_only("management_ip")) diff --git a/src/cnaas_nms/db/tests/test_mgmtdomain.py b/src/cnaas_nms/db/tests/test_mgmtdomain.py index 89787de0..03f5929c 100644 --- a/src/cnaas_nms/db/tests/test_mgmtdomain.py +++ b/src/cnaas_nms/db/tests/test_mgmtdomain.py @@ -91,6 +91,17 @@ def test_find_free_mgmt_ip(self): mgmtdomain = session.query(Mgmtdomain).limit(1).one() mgmtdomain.find_free_mgmt_ip(session) + def test_find_free_mgmt_ip_v6(self): + with sqla_session() as session: + mgmtdomain = session.query(Mgmtdomain).limit(1).one() + mgmtdomain.find_free_mgmt_ip(session, version=6) + + def test_find_free_mgmt_ip_should_fail_on_invalid_ip_version(self): + with sqla_session() as session: + mgmtdomain = session.query(Mgmtdomain).limit(1).one() + with self.assertRaises(ValueError): + mgmtdomain.find_free_mgmt_ip(session, version=42) + def test_find_mgmtdomain_by_ip(self): with sqla_session() as session: mgmtdomain = cnaas_nms.db.helper.find_mgmtdomain_by_ip(session, IPv4Address("10.0.6.6")) From d2f59b3fe0781125f0258f3e2ffaf968218723be Mon Sep 17 00:00:00 2001 From: Morten Brekkevold Date: Wed, 11 Jan 2023 13:05:08 +0100 Subject: [PATCH 094/169] Add new mgmtdomain_primary_ip_version api setting This config setting will be used to choose which IP address version is preferred as the primary management address of a device in a dual-stack management domain. --- docker/api/config/api.yml | 1 + docs/configuration/index.rst | 3 +++ src/cnaas_nms/app_settings.py | 11 ++++++++++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/docker/api/config/api.yml b/docker/api/config/api.yml index 40754df9..32a87fcd 100644 --- a/docker/api/config/api.yml +++ b/docker/api/config/api.yml @@ -9,5 +9,6 @@ certpath: /tmp/devicecerts/ global_unique_vlans: True init_mgmt_timeout: 30 mgmtdomain_reserved_count: 5 +mgmtdomain_primary_ip_version: 4 commit_confirmed_mode: 1 commit_confirmed_timeout: 300 diff --git a/docs/configuration/index.rst b/docs/configuration/index.rst index ac33be8b..6221622b 100644 --- a/docs/configuration/index.rst +++ b/docs/configuration/index.rst @@ -38,6 +38,9 @@ Defines parameters for the API: each defined management domain when assigning new management IP addresses to devices. Defaults to 5 (e.g. meaning 10.0.0.1 through 10.0.0.5 would remain unassigned on a domain for 10.0.0.0/24). +- mgmtdomain_primary_ip_version: For dual stack management domains, this setting + defines whether IP version 4 or 6 is preferred when an access device's primary + management address is assigned. The only valid values are therefore 4 and 6. - commit_confirmed_mode: Integer specifying default commit confirm mode (see :ref:`commit_confirm_modes`). Defaults to 1. - commit_confirmed_timeout: Time to wait before rolling back an unconfirmed commit, diff --git a/src/cnaas_nms/app_settings.py b/src/cnaas_nms/app_settings.py index 65904c9e..4d1622b2 100644 --- a/src/cnaas_nms/app_settings.py +++ b/src/cnaas_nms/app_settings.py @@ -2,7 +2,7 @@ from typing import Optional import yaml -from pydantic import BaseSettings, PostgresDsn +from pydantic import BaseSettings, PostgresDsn, validator class AppSettings(BaseSettings): @@ -49,10 +49,18 @@ class ApiSettings(BaseSettings): GLOBAL_UNIQUE_VLANS: bool = True INIT_MGMT_TIMEOUT: int = 30 MGMTDOMAIN_RESERVED_COUNT: int = 5 + MGMTDOMAIN_PRIMARY_IP_VERSION: int = 4 COMMIT_CONFIRMED_MODE: int = 1 COMMIT_CONFIRMED_TIMEOUT: int = 300 SETTINGS_OVERRIDE: Optional[dict] = None + @validator("MGMTDOMAIN_PRIMARY_IP_VERSION") + @classmethod + def primary_ip_version_is_valid(cls, version: int) -> int: + if version not in (4, 6): + raise ValueError("must be either 4 or 6") + return version + def construct_api_settings() -> ApiSettings: api_config = Path("/etc/cnaas-nms/api.yml") @@ -79,6 +87,7 @@ def construct_api_settings() -> ApiSettings: GLOBAL_UNIQUE_VLANS=config.get("global_unique_vlans", True), INIT_MGMT_TIMEOUT=config.get("init_mgmt_timeout", 30), MGMTDOMAIN_RESERVED_COUNT=config.get("mgmtdomain_reserved_count", 5), + MGMTDOMAIN_PRIMARY_IP_VERSION=config.get("mgmtdomain_primary_ip_version", 4), COMMIT_CONFIRMED_MODE=config.get("commit_confirmed_mode", 1), COMMIT_CONFIRMED_TIMEOUT=config.get("commit_confirmed_timeout", 300), SETTINGS_OVERRIDE=config.get("settings_override", None), From 7d3a0006c96826b2823a85c045d320d5d198df31 Mon Sep 17 00:00:00 2001 From: Morten Brekkevold Date: Wed, 11 Jan 2023 13:35:27 +0100 Subject: [PATCH 095/169] Add methods for dual-stack Mgmtdomain operations --- src/cnaas_nms/db/mgmtdomain.py | 36 +++++++++++++++++++++++ src/cnaas_nms/db/tests/test_mgmtdomain.py | 23 +++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/src/cnaas_nms/db/mgmtdomain.py b/src/cnaas_nms/db/mgmtdomain.py index e6f7c893..01342049 100644 --- a/src/cnaas_nms/db/mgmtdomain.py +++ b/src/cnaas_nms/db/mgmtdomain.py @@ -61,6 +61,42 @@ def as_dict(self): pass return d + @property + def is_dual_stack(self) -> bool: + """Returns True if this mgmt domain is dual-stack""" + return bool(self.ipv4_gw) and bool(self.ipv6_gw) + + @property + def primary_gw(self) -> Optional[str]: + """Returns the primary gateway interface for this Mgmtdomain, depending on the configured preference""" + primary_version = api_settings.MGMTDOMAIN_PRIMARY_IP_VERSION + return self.ipv4_gw if primary_version == 4 else self.ipv6_gw + + @property + def secondary_gw(self) -> Optional[str]: + """Returns the secondary gateway interface for this Mgmtdomain, depending on the configured preference""" + primary_version = api_settings.MGMTDOMAIN_PRIMARY_IP_VERSION + return self.ipv6_gw if primary_version == 4 else self.ipv4_gw + + def find_free_primary_mgmt_ip(self, session) -> Optional[IPAddress]: + """Returns the first available IP address from this Mgmtdomain's primary network. + + The return value type depends on what IP version CNaaS-NMS is configured to use for + primary management addresses. + """ + primary_version = api_settings.MGMTDOMAIN_PRIMARY_IP_VERSION + return self.find_free_mgmt_ip(session, version=primary_version) + + def find_free_secondary_mgmt_ip(self, session) -> Optional[IPAddress]: + """Returns the first available IP address from this Mgmtdomain's secondary network (if + such a network is configured). + + The return value type depends on what IP version CNaaS-NMS is configured to use for + primary management addresses. + """ + secondary_version = 6 if api_settings.MGMTDOMAIN_PRIMARY_IP_VERSION == 4 else 4 + return self.find_free_mgmt_ip(session, version=secondary_version) + def find_free_mgmt_ip(self, session, version: int = 4) -> Optional[IPAddress]: """Returns the first available IP address from this Mgmtdomain's network. diff --git a/src/cnaas_nms/db/tests/test_mgmtdomain.py b/src/cnaas_nms/db/tests/test_mgmtdomain.py index 03f5929c..8a65c710 100644 --- a/src/cnaas_nms/db/tests/test_mgmtdomain.py +++ b/src/cnaas_nms/db/tests/test_mgmtdomain.py @@ -86,6 +86,29 @@ def test_find_mgmtdomain_oneaccess(self): mgmtdomain = cnaas_nms.db.helper.find_mgmtdomain(session, ["eosaccess"]) self.assertIsNotNone(mgmtdomain, "No mgmtdomain found for eosaccess") + def test_is_dual_stack_should_be_false_for_default_domain(self): + with sqla_session() as session: + mgmtdomain = session.query(Mgmtdomain).limit(1).one() + self.assertFalse(mgmtdomain.is_dual_stack) # domain in test data is not dual stack + + def test_primary_gw_should_be_ipv4_for_default_domain(self): + with sqla_session() as session: + mgmtdomain = session.query(Mgmtdomain).limit(1).one() + self.assertEqual(mgmtdomain.ipv4_gw, mgmtdomain.primary_gw) + + def test_find_free_primary_mgmt_ip_should_return_an_ipv4_address(self): + with sqla_session() as session: + mgmtdomain = session.query(Mgmtdomain).limit(1).one() + value = mgmtdomain.find_free_primary_mgmt_ip(session) + self.assertTrue(value) + self.assertIsInstance(value, IPv4Address) + + def test_find_free_secondary_mgmt_ip_should_return_none(self): + with sqla_session() as session: + mgmtdomain = session.query(Mgmtdomain).limit(1).one() + value = mgmtdomain.find_free_secondary_mgmt_ip(session) + self.assertIsNone(value) # domain in test data has no secondary network + def test_find_free_mgmt_ip(self): with sqla_session() as session: mgmtdomain = session.query(Mgmtdomain).limit(1).one() From fd8c0268d7dff3bf171addc03b3564af2e4e65a6 Mon Sep 17 00:00:00 2001 From: Morten Brekkevold Date: Wed, 11 Jan 2023 15:26:34 +0100 Subject: [PATCH 096/169] Add secondary_management_ip to Device This allows (but does not require) devices to be configured with dual-stack management (even though CNaaS-NMS will continue to use the primary management address). --- ...d_device_secondary_management_ip_column.py | 28 +++++++++++++++++++ src/cnaas_nms/db/device.py | 1 + 2 files changed, 29 insertions(+) create mode 100644 alembic/versions/adcce7d9baaa_add_device_secondary_management_ip_column.py diff --git a/alembic/versions/adcce7d9baaa_add_device_secondary_management_ip_column.py b/alembic/versions/adcce7d9baaa_add_device_secondary_management_ip_column.py new file mode 100644 index 00000000..ed86326e --- /dev/null +++ b/alembic/versions/adcce7d9baaa_add_device_secondary_management_ip_column.py @@ -0,0 +1,28 @@ +""""Add secondary_management_ip to Device + +Revision ID: adcce7d9baaa +Revises: 2f9faee221a7 +Create Date: 2023-01-11 15:18:12.188994 + +""" +import sqlalchemy as sa +import sqlalchemy_utils + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "adcce7d9baaa" +down_revision = "2f9faee221a7" +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column( + "device", + sa.Column("secondary_management_ip", sqlalchemy_utils.types.ip_address.IPAddressType(length=50), nullable=True), + ) + + +def downgrade(): + op.drop_column("device", "secondary_management_ip") diff --git a/src/cnaas_nms/db/device.py b/src/cnaas_nms/db/device.py index 1522ee9b..a95ae3fe 100644 --- a/src/cnaas_nms/db/device.py +++ b/src/cnaas_nms/db/device.py @@ -77,6 +77,7 @@ class Device(cnaas_nms.db.base.Base): site = relationship("Site") description = Column(Unicode(255)) management_ip = Column(IPAddressType) + secondary_management_ip = Column(IPAddressType) dhcp_ip = Column(IPAddressType) infra_ip = Column(IPAddressType) oob_ip = Column(IPAddressType) From 1a69c5af5b3df02faded324cade8f3c6a4b5d2a0 Mon Sep 17 00:00:00 2001 From: Morten Brekkevold Date: Wed, 11 Jan 2023 15:49:26 +0100 Subject: [PATCH 097/169] Assign secondary mgmt IPs in access device init This updates init_access_device_step1 to fetch, reserv and assign secondary management IPs from any dual-stack Mgmtdomain. --- src/cnaas_nms/devicehandler/init_device.py | 39 ++++++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/src/cnaas_nms/devicehandler/init_device.py b/src/cnaas_nms/devicehandler/init_device.py index 54731145..aa57f65d 100644 --- a/src/cnaas_nms/devicehandler/init_device.py +++ b/src/cnaas_nms/devicehandler/init_device.py @@ -496,18 +496,31 @@ def init_access_device_step1( # Select a new management IP for the device ReservedIP.clean_reservations(session, device=dev) session.commit() - mgmt_ip = mgmtdomain.find_free_mgmt_ip(session) + mgmt_ip = mgmtdomain.find_free_primary_mgmt_ip(session) if not mgmt_ip: raise Exception( - "Could not find free management IP for management domain {}/{}".format( + "Could not find free primary management IP for management domain {}/{}".format( mgmtdomain.id, mgmtdomain.description ) ) reserved_ip = ReservedIP(device=dev, ip=mgmt_ip) session.add(reserved_ip) + + secondary_mgmt_ip = None + if mgmtdomain.is_dual_stack: + secondary_mgmt_ip = mgmtdomain.find_free_secondary_mgmt_ip() + if not secondary_mgmt_ip: + raise Exception( + "Could not find free secondary management IP for management domain {}/{}".format( + mgmtdomain.id, mgmtdomain.description + ) + ) + reserved_ip = ReservedIP(device=dev, ip=secondary_mgmt_ip) + session.add(reserved_ip) + session.commit() # Populate variables for template rendering - mgmt_gw_ipif = ip_interface(mgmtdomain.ipv6_gw or mgmtdomain.ipv4_gw) + mgmt_gw_ipif = ip_interface(mgmtdomain.primary_gw) mgmt_variables = { "mgmt_ipif": str(ip_interface("{}/{}".format(mgmt_ip, mgmt_gw_ipif.network.prefixlen))), "mgmt_ip": str(mgmt_ip), @@ -515,6 +528,16 @@ def init_access_device_step1( "mgmt_vlan_id": mgmtdomain.vlan, "mgmt_gw": mgmt_gw_ipif.ip, } + if secondary_mgmt_ip: + secondary_mgmt_gw_ipif = ip_interface(mgmtdomain.secondary_gw) + mgmt_variables.update( + { + "secondary_mgmt_ipif": str( + ip_interface("{}/{}".format(secondary_mgmt_ip, secondary_mgmt_gw_ipif.network.prefixlen)) + ), + "secondary_mgmt_ip": secondary_mgmt_ip, + } + ) device_variables = populate_device_vars(session, dev, new_hostname, DeviceType.ACCESS) device_variables = {**device_variables, **mgmt_variables} # Update device state @@ -555,17 +578,19 @@ def init_access_device_step1( linknets = dev.get_linknets(session) for linknet in linknets: session.delete(linknet) - reserved_ip = session.query(ReservedIP).filter(ReservedIP.device == dev).one_or_none() - if reserved_ip: + reserved_ips = session.query(ReservedIP).filter(ReservedIP.device == dev).all() + for reserved_ip in reserved_ips: session.delete(reserved_ip) return NornirJobResult(nrresult=nrresult) dev.management_ip = device_variables["mgmt_ip"] + if secondary_mgmt_ip: + dev.secondary_management_ip = secondary_mgmt_ip dev.state = DeviceState.INIT dev.device_type = DeviceType.ACCESS # Remove the reserved IP since it's now saved in the device database instead - reserved_ip = session.query(ReservedIP).filter(ReservedIP.device == dev).one_or_none() - if reserved_ip: + reserved_ips = session.query(ReservedIP).filter(ReservedIP.device == dev).all() + for reserved_ip in reserved_ips: session.delete(reserved_ip) # Plugin hook, allocated IP From fada0774b9ba96f127f84f732a9ecaa04195326b Mon Sep 17 00:00:00 2001 From: Morten Brekkevold Date: Thu, 12 Jan 2023 15:45:24 +0100 Subject: [PATCH 098/169] Add extra variables for secondary mgmt addr Prefix length and mgmt_gw should be available variables also for the secondary management address. --- src/cnaas_nms/devicehandler/init_device.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cnaas_nms/devicehandler/init_device.py b/src/cnaas_nms/devicehandler/init_device.py index aa57f65d..bcbf723b 100644 --- a/src/cnaas_nms/devicehandler/init_device.py +++ b/src/cnaas_nms/devicehandler/init_device.py @@ -536,6 +536,8 @@ def init_access_device_step1( ip_interface("{}/{}".format(secondary_mgmt_ip, secondary_mgmt_gw_ipif.network.prefixlen)) ), "secondary_mgmt_ip": secondary_mgmt_ip, + "secondary_mgmt_prefixlen": int(secondary_mgmt_gw_ipif.network.prefixlen), + "secondary_mgmt_gw": secondary_mgmt_gw_ipif.ip, } ) device_variables = populate_device_vars(session, dev, new_hostname, DeviceType.ACCESS) From 6c3ccd5a64acb6c49c54fc2fd3cc42e34e48b20c Mon Sep 17 00:00:00 2001 From: Morten Brekkevold Date: Thu, 12 Jan 2023 15:47:21 +0100 Subject: [PATCH 099/169] Allow both IPv4 and IPv6 mgmt ip vars --- src/cnaas_nms/devicehandler/sync_devices.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 6f1e3083..ef675d8a 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -2,7 +2,7 @@ import os import time from hashlib import sha256 -from ipaddress import IPv4Address, IPv4Interface +from ipaddress import IPv4Address, IPv4Interface, ip_interface from typing import List, Optional, Tuple import yaml @@ -156,11 +156,11 @@ def populate_device_vars( "Could not find appropriate management domain for management_ip: {}".format(dev.management_ip) ) - mgmt_gw_ipif = IPv4Interface(mgmtdomain.ipv4_gw) + mgmt_gw_ipif = ip_interface(mgmtdomain.primary_gw) access_device_variables = { "mgmt_vlan_id": mgmtdomain.vlan, "mgmt_gw": str(mgmt_gw_ipif.ip), - "mgmt_ipif": str(IPv4Interface("{}/{}".format(mgmt_ip, mgmt_gw_ipif.network.prefixlen))), + "mgmt_ipif": str(ip_interface("{}/{}".format(mgmt_ip, mgmt_gw_ipif.network.prefixlen))), "mgmt_ip": str(mgmt_ip), "mgmt_prefixlen": int(mgmt_gw_ipif.network.prefixlen), "interfaces": [], From ade0aabfbb4caf6edf298f2ef69254d2957c0bb5 Mon Sep 17 00:00:00 2001 From: Morten Brekkevold Date: Thu, 12 Jan 2023 15:48:24 +0100 Subject: [PATCH 100/169] Include vars for secondary addrs on ACCESS devices This repeats a lot of what `init_access_device_step1` does. Not sure why there is an overlap, but I'm not going to change it without being sure. --- src/cnaas_nms/devicehandler/sync_devices.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index ef675d8a..5b57ce51 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -165,6 +165,20 @@ def populate_device_vars( "mgmt_prefixlen": int(mgmt_gw_ipif.network.prefixlen), "interfaces": [], } + if dev.secondary_management_ip: + secondary_mgmt_gw_ipif = ip_interface(mgmtdomain.secondary_gw) + access_device_variables.update( + { + "secondary_mgmt_ipif": str( + ip_interface( + "{}/{}".format(dev.secondary_management_ip, secondary_mgmt_gw_ipif.network.prefixlen) + ) + ), + "secondary_mgmt_ip": dev.secondary_management_ip, + "secondary_mgmt_prefixlen": int(secondary_mgmt_gw_ipif.network.prefixlen), + "secondary_mgmt_gw": secondary_mgmt_gw_ipif.ip, + } + ) # Check peer names for populating description on ACCESS_DOWNLINK ports ifname_peer_map = dev.get_linknet_localif_mapping(session) From 5e323f6a85b2865810757e5ea9f7076927e101c2 Mon Sep 17 00:00:00 2001 From: Morten Brekkevold Date: Tue, 17 Jan 2023 16:22:42 +0100 Subject: [PATCH 101/169] Accept secondary_management_ip in device PUT This also refactors redundant IP field value validation code. The logic for all IP addr fields is the same, so no need for duplication. --- src/cnaas_nms/db/device.py | 41 ++++++++++---------------------------- 1 file changed, 10 insertions(+), 31 deletions(-) diff --git a/src/cnaas_nms/db/device.py b/src/cnaas_nms/db/device.py index a95ae3fe..4604311f 100644 --- a/src/cnaas_nms/db/device.py +++ b/src/cnaas_nms/db/device.py @@ -377,38 +377,17 @@ def validate(cls, new_entry=True, **kwargs): if "description" in kwargs: data["description"] = kwargs["description"] - if "management_ip" in kwargs: - if kwargs["management_ip"]: - try: - addr = ipaddress.ip_address(kwargs["management_ip"]) - except Exception: - errors.append("Invalid management_ip received. Must be a valid IP address.") - else: - data["management_ip"] = addr - else: - data["management_ip"] = None - - if "infra_ip" in kwargs: - if kwargs["infra_ip"]: - try: - addr = ipaddress.ip_address(kwargs["infra_ip"]) - except Exception: - errors.append("Invalid infra_ip received. Must be valid IP address.") - else: - data["infra_ip"] = addr - else: - data["infra_ip"] = None - - if "dhcp_ip" in kwargs: - if kwargs["dhcp_ip"]: - try: - addr = ipaddress.ip_address(kwargs["dhcp_ip"]) - except Exception: - errors.append("Invalid dhcp_ip received. Must be valid IP address.") + for ip_field in ("management_ip", "secondary_management_ip", "infra_ip", "dhcp_ip"): + if ip_field in kwargs: + if kwargs[ip_field]: + try: + addr = ipaddress.ip_address(kwargs[ip_field]) + except Exception: + errors.append("Invalid {} received. Must be a valid IP address.".format(ip_field)) + else: + data[ip_field] = addr else: - data["dhcp_ip"] = addr - else: - data["dhcp_ip"] = None + data[ip_field] = None if "serial" in kwargs: try: From 6af85352acb5718ec9718c36b9431b62906f2eea Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Thu, 6 Apr 2023 09:48:31 +0200 Subject: [PATCH 102/169] update redis for CVE-2023-28859 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0522b06a..291a8eca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,7 +24,7 @@ psycopg2-binary==2.9.3 pytest==7.1.3 pytest-cov==3.0.0 pytest-docker-compose==3.2.1 -redis==4.3.4 +redis==4.3.6 redis-lru==0.1.0 Sphinx==5.1.1 SQLAlchemy==1.4.41 From 8a7ef84a73e10ef96c1f5a4dff3bf963b44f17e5 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Thu, 6 Apr 2023 13:27:39 +0200 Subject: [PATCH 103/169] script to manually generate ESI IDs etc for custom ports --- src/cnaas_nms/tools/generate_esi.py | 171 ++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100755 src/cnaas_nms/tools/generate_esi.py diff --git a/src/cnaas_nms/tools/generate_esi.py b/src/cnaas_nms/tools/generate_esi.py new file mode 100755 index 00000000..5f6eeeaa --- /dev/null +++ b/src/cnaas_nms/tools/generate_esi.py @@ -0,0 +1,171 @@ +#!/bin/env python3 + +import argparse +import os +import sys + +from jinja_helpers import get_environment_secrets + +try: + import jinja2 + import requests + import yaml + from jinja2.meta import find_undeclared_variables +except ModuleNotFoundError as e: + print("Please install python modules requests, jinja2 and (ruamel.)yaml: {}".format(e)) + print("Optionally install netutils for more filters") + sys.exit(3) + +if "CNAASURL" not in os.environ or "JWT_AUTH_TOKEN" not in os.environ: + print("Please export environment variables CNAASURL and JWT_AUTH_TOKEN") + sys.exit(4) + +api_url = os.environ["CNAASURL"] +headers = {"Authorization": "Bearer " + os.environ["JWT_AUTH_TOKEN"]} +verify_tls = True + + +def get_entrypoint(platform, device_type): + mapfile = os.path.join(platform, "mapping.yml") + if not os.path.isfile(mapfile): + raise Exception("File {} not found".format(mapfile)) + with open(mapfile, "r") as f: + mapping = yaml.safe_load(f) + template_file = mapping[device_type]["entrypoint"] + return template_file + + +def get_device_details(hostname): + r = requests.get(f"{api_url}/api/v1.0/device/{hostname}", verify=verify_tls, headers=headers) + if r.status_code != 200: + raise Exception("Could not query device API") + device_data = r.json()["data"]["devices"][0] + + r = requests.get(f"{api_url}/api/v1.0/device/{hostname}/generate_config", verify=verify_tls, headers=headers) + if r.status_code != 200: + raise Exception("Could not query generate_config API") + config_data = r.json()["data"]["config"] + + return ( + device_data["device_type"], + device_data["platform"], + config_data["available_variables"], + config_data["generated_config"], + ) + + +def load_jinja_filters(): + ret = {} + try: + import jinja_filters + + ret = jinja_filters.FILTERS + except ModuleNotFoundError as e: + print("jinja_filters.py could not be loaded from PYTHONPATH, proceeding without filters: " f"{e}") + try: + from netutils.utils import jinja2_convenience_function + + ret = {**ret, **jinja2_convenience_function()} + except ModuleNotFoundError as e: + print("netutils could not be loaded from PYTHONPATH, proceeding without filters: " f"{e}") + return ret + + +def render_template(platform, device_type, variables): + # Jinja env should match nornir_helper.cnaas_ninja_env + jinjaenv = jinja2.Environment( + loader=jinja2.FileSystemLoader(platform), + undefined=jinja2.DebugUndefined, + trim_blocks=True, + lstrip_blocks=True, + keep_trailing_newline=True, + ) + jfilters = load_jinja_filters() + jinjaenv.filters.update(jfilters) + template_vars = {**variables, **get_environment_secrets()} + template = jinjaenv.get_template(get_entrypoint(platform, device_type)) + rendered = template.render(**template_vars) + # Find undefined variables, if + ast = jinjaenv.parse(rendered) + undefined_vars = find_undeclared_variables(ast=ast) + if undefined_vars: + for var in undefined_vars: + if var.startswith("TEMPLATE_SECRET_"): + template_vars[var] = "dummyvalue" + print('Undefined secret variable, set to "dummyvalue": {}'.format(var)) + else: + print("Undefined variable: {}".format(var)) + rendered = template.render(**template_vars) + return rendered + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("hostname") + parser.add_argument("interface") + parser.add_argument("-k", "--skip-verify", help="skip TLS cert verification", action="store_true") + parser.add_argument( + "-s", "--search-lines", help="how many lines to search after interface name (default 20)", type=int + ) + parser.add_argument("-p", "--print-lines", help="how many lines to print after match (default 5)", type=int) + parser.add_argument("-m", "--match", help="""text to match for after interface (default "evpn")""") + args = parser.parse_args() + + hostname = args.hostname + interface = args.interface + if args.skip_verify: + global verify_tls + verify_tls = False + try: + device_type, platform, variables, old_config = get_device_details(hostname) + except Exception as e: + print(e) + sys.exit(2) + if args.search_lines: + search_lines = args.search_lines + else: + search_lines = 20 + if args.print_lines: + print_lines = args.print_lines + else: + print_lines = 5 + if args.match: + match_str = args.match + else: + match_str = "evpn" + variables["host"] = hostname + new_variables = variables + new_interface = {} + for intf in variables["interfaces"]: + if intf["name"] == interface: + new_interface = { + "name": interface, + "ifclass": "downlink", + "redundant_link": True, + "indexnum": intf["indexnum"], + "data": {}, + } + new_variables["interfaces"] = [new_interface] + new_config = render_template(platform, device_type, variables) + save_after = 0 + saved_lines = [] + for line in new_config.splitlines(): + if save_after > 0: + saved_lines.append(line) + save_after -= 1 + if interface in line: + saved_lines.append(line) + save_after = search_lines + + print_after = 0 + for line in saved_lines: + if print_after > 0: + print(line) + print_after -= 1 + if match_str in line: + print(line) + print_after = print_lines + + +if __name__ == "__main__": + main() From 622125e5abd3d9d40b0cadaabb0047f188e14cbb Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 26 Apr 2023 14:59:00 +0200 Subject: [PATCH 104/169] Fix bug on access->access ZTP where initcheck would fail when configtypes is downlink but data is empty/null --- src/cnaas_nms/devicehandler/get.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/get.py b/src/cnaas_nms/devicehandler/get.py index 237f5d1a..92126636 100644 --- a/src/cnaas_nms/devicehandler/get.py +++ b/src/cnaas_nms/devicehandler/get.py @@ -307,7 +307,7 @@ def verify_peer_iftype( raise InterfaceError( "Peer device interface not configured as ACCESS_DOWNLINK: {} {}".format(remote_dev.hostname, remote_if) ) - if "redundant_link" in remote_intf.data and not remote_intf.data["redundant_link"]: + if remote_intf.data and "redundant_link" in remote_intf.data and not remote_intf.data["redundant_link"]: return False return True From 668e0a1ee2f336161f0c156893beba9f2aa8c58b Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 8 May 2023 12:59:51 +0200 Subject: [PATCH 105/169] Fix error "Error in repository: HEAD is a detached" when refreshing settings repo This occurs when refreshing repo from a branch, then refreshing again but with settings again that fails syntax check, and then refreshing one or more times again after that --- src/cnaas_nms/db/git.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cnaas_nms/db/git.py b/src/cnaas_nms/db/git.py index aee5e3e9..86793652 100644 --- a/src/cnaas_nms/db/git.py +++ b/src/cnaas_nms/db/git.py @@ -161,6 +161,9 @@ def _refresh_repo_task(repo_type: RepoType = RepoType.TEMPLATES) -> str: local_repo = Repo(local_repo_path) # If repo url has changed current_repo_url = next(local_repo.remotes.origin.urls) + # Reset head if it's detached + if local_repo.head.is_detached: + reset_repo(local_repo, remote_repo_path) if current_repo_url != url or (branch and local_repo.head.ref.name != branch): logger.info( "Repo URL for {} has changed from {}#{} to {}#{}".format( @@ -173,9 +176,6 @@ def _refresh_repo_task(repo_type: RepoType = RepoType.TEMPLATES) -> str: ) shutil.rmtree(local_repo_path) raise NoSuchPathError - # Reset head if it's detached - if local_repo.head.is_detached: - reset_repo(local_repo, remote_repo_path) prev_commit = local_repo.commit().hexsha diff = local_repo.remotes.origin.pull() for item in diff: From d4c637ef981e34534a5c978135ffd63f30cbb246 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 8 May 2023 13:29:18 +0200 Subject: [PATCH 106/169] handle more error cases like when changing to/from other branch with syntax errors --- src/cnaas_nms/db/git.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/cnaas_nms/db/git.py b/src/cnaas_nms/db/git.py index 86793652..e8b99734 100644 --- a/src/cnaas_nms/db/git.py +++ b/src/cnaas_nms/db/git.py @@ -162,14 +162,23 @@ def _refresh_repo_task(repo_type: RepoType = RepoType.TEMPLATES) -> str: # If repo url has changed current_repo_url = next(local_repo.remotes.origin.urls) # Reset head if it's detached + reset_head_failed = False if local_repo.head.is_detached: - reset_repo(local_repo, remote_repo_path) - if current_repo_url != url or (branch and local_repo.head.ref.name != branch): + try: + reset_repo(local_repo, remote_repo_path) + except Exception: + logger.exception("Git repo had detached head and repo reset failed: {}".format(remote_repo_path)) + reset_head_failed = True + if reset_head_failed or current_repo_url != url or (branch and local_repo.head.ref.name != branch): + if reset_head_failed: + current_branch = "detached" # unable to get head.ref.name if head was detached + else: + current_branch = local_repo.head.ref.name logger.info( - "Repo URL for {} has changed from {}#{} to {}#{}".format( + "Repo URL for {} has changed from {}#{} to {}#{}, hard reset repo clone".format( repo_type.name, current_repo_url, - local_repo.head.ref.name, + current_branch, url, branch, ) From 94f16099e6949a9703256fc2fcd6fca23f7aef87 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 8 May 2023 15:06:17 +0200 Subject: [PATCH 107/169] Add missing docs for job API abort action --- docs/apiref/jobs.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/apiref/jobs.rst b/docs/apiref/jobs.rst index 7e5a9b2c..b111e876 100644 --- a/docs/apiref/jobs.rst +++ b/docs/apiref/jobs.rst @@ -107,6 +107,18 @@ It's also possible to query a single job by job ID: curl http://hostname/api/v1.0/job/5 +Abort scheduled job +------------------- + +To abort a scheduled job send a request like this: + +:: + + curl http://hostname/api/v1.0/job/5 -X PUT -d '{"action": "ABORT"}' -H "Content-Type: application/json" + +You can also send the request to a running job, but currently only firmware +upgrade will listen for aborts while running and devices that already passed +the check will continue with their upgrades. Locks ----- From d40d91192e9f0bf7f87ce8c1900cc0489b22d86b Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 8 May 2023 15:08:34 +0200 Subject: [PATCH 108/169] try to get github dependabot to not hang on this file --- requirements-toplevel-cmd.sh | 2 ++ requirements-toplevel-cmd.txt | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 requirements-toplevel-cmd.sh delete mode 100644 requirements-toplevel-cmd.txt diff --git a/requirements-toplevel-cmd.sh b/requirements-toplevel-cmd.sh new file mode 100644 index 00000000..f2a9e4b0 --- /dev/null +++ b/requirements-toplevel-cmd.sh @@ -0,0 +1,2 @@ +# python3 -m pip freeze | egrep "^(SQLAlchemy|nornir|Flask-JWT-Extended|flask-restplus|APScheduler|psycopg2|mypy|sqlalchemy-stubs|nose|GitPython|alembic|Sphinx|coverage|pluggy|redis|Flask-SocketIO|gevent|Flask-Cors|redis-lru)" > requirements.txt +# sqlalchemy-stubs is required for mypy to handle typing definitions from sqlalchemy? diff --git a/requirements-toplevel-cmd.txt b/requirements-toplevel-cmd.txt deleted file mode 100644 index 507cd7b1..00000000 --- a/requirements-toplevel-cmd.txt +++ /dev/null @@ -1,2 +0,0 @@ -python3 -m pip freeze | egrep "^(SQLAlchemy|nornir|Flask-JWT-Extended|flask-restplus|APScheduler|psycopg2|mypy|sqlalchemy-stubs|nose|GitPython|alembic|Sphinx|coverage|pluggy|redis|Flask-SocketIO|gevent|Flask-Cors|redis-lru)" > requirements.txt -# sqlalchemy-stubs is required for mypy to handle typing definitions from sqlalchemy? From 86b60582689d5d3798c330919bec29f64e1f8b0c Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 9 May 2023 15:25:20 +0200 Subject: [PATCH 109/169] on PR wolkflow action use pr number to reference branch instead of branch name, since branch name only works for PRs from same repo and not from forks etc --- .github/workflows/run-unit-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml index d8540b93..6d795f92 100644 --- a/.github/workflows/run-unit-tests.yml +++ b/.github/workflows/run-unit-tests.yml @@ -44,7 +44,7 @@ jobs: - name: Get branch name (pull request) if: github.event_name == 'pull_request' shell: bash - run: echo "BRANCH_NAME=$(echo ${GITHUB_HEAD_REF} | tr / -)" >> $GITHUB_ENV + run: echo "BRANCH_NAME=pr/$(echo ${GITHUB_REF#refs/pull/} | cut -d '/' -f1)" >> $GITHUB_ENV - name: Get repo URL shell: bash From e008fd2b62fc2351949656b0b196e7fb95d42f33 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 May 2023 13:29:44 +0000 Subject: [PATCH 110/169] Bump flask from 2.1.2 to 2.2.5 Bumps [flask](https://github.com/pallets/flask) from 2.1.2 to 2.2.5. - [Release notes](https://github.com/pallets/flask/releases) - [Changelog](https://github.com/pallets/flask/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/flask/compare/2.1.2...2.2.5) --- updated-dependencies: - dependency-name: flask dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 291a8eca..feb9677f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ coverage==6.4.4 Flask-Cors==3.0.10 Flask-JWT-Extended==4.4.4 flask-restx==1.0.5 -flask==2.1.2 +flask==2.2.5 Flask-SocketIO==5.3.1 gevent==21.12.0 GitPython==3.1.30 From df6a0af39bad91e77e4cb00ac10756cb7bd0e8cf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 May 2023 13:35:20 +0000 Subject: [PATCH 111/169] Bump redis from 4.3.6 to 4.4.4 Bumps [redis](https://github.com/redis/redis-py) from 4.3.6 to 4.4.4. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v4.3.6...v4.4.4) --- updated-dependencies: - dependency-name: redis dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index feb9677f..68824699 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,7 +24,7 @@ psycopg2-binary==2.9.3 pytest==7.1.3 pytest-cov==3.0.0 pytest-docker-compose==3.2.1 -redis==4.3.6 +redis==4.4.4 redis-lru==0.1.0 Sphinx==5.1.1 SQLAlchemy==1.4.41 From 53666e848842d83e0b3d7befe8e833fe083db951 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 15 May 2023 14:29:59 +0200 Subject: [PATCH 112/169] When doing init of a new device, negihbor/uplink device sync statuses are checked, but if they have local changes saved the devices were not marked as unsynced in database --- src/cnaas_nms/devicehandler/init_device.py | 2 +- src/cnaas_nms/devicehandler/sync_devices.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/cnaas_nms/devicehandler/init_device.py b/src/cnaas_nms/devicehandler/init_device.py index bcbf723b..857bee99 100644 --- a/src/cnaas_nms/devicehandler/init_device.py +++ b/src/cnaas_nms/devicehandler/init_device.py @@ -657,7 +657,7 @@ def check_neighbor_sync(session, hostnames: List[str]): raise DeviceStateError("Neighbor device {} not in state MANAGED".format(hostname)) if not dev.synchronized: raise DeviceSyncError("Neighbor device {} not synchronized".format(hostname)) - confcheck_devices(hostnames) + confcheck_devices(session, hostnames) @job_wrapper diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 5b57ce51..7d2c0c46 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -664,7 +664,7 @@ def update_config_hash(task): logger.debug("Config hash for {} updated to {}".format(task.host.name, new_config_hash)) -def confcheck_devices(hostnames: List[str], job_id=None): +def confcheck_devices(session, hostnames: List[str], job_id=None): nr = cnaas_init() nr_filtered, dev_count, skipped_hostnames = inventory_selector(nr, hostname=hostnames) @@ -674,6 +674,9 @@ def confcheck_devices(hostnames: List[str], job_id=None): raise e else: if nrresult.failed: + for hostname in nrresult.failed_hosts.keys(): + dev: Device = session.query(Device).filter(Device.hostname == hostname).one() + dev.synchronized = False raise Exception("Configuration hash check failed for {}".format(" ".join(nrresult.failed_hosts.keys()))) From 38f1affac4a0d54cbbe07d52d700b7248308634b Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 13 Jun 2023 14:49:15 +0200 Subject: [PATCH 113/169] allow settings in base_system to have groups auto filtering --- src/cnaas_nms/db/settings.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/cnaas_nms/db/settings.py b/src/cnaas_nms/db/settings.py index 4dd140c8..7793556e 100644 --- a/src/cnaas_nms/db/settings.py +++ b/src/cnaas_nms/db/settings.py @@ -556,9 +556,17 @@ def get_settings( settings_origin[k] = "default" # 2. Get settings repo global settings - settings, settings_origin = read_settings( - local_repo_path, ["global", "base_system.yml"], "global->base_system.yml", settings, settings_origin - ) + if hostname: + # Some settings parsing require knowledge of group memberships + groups = get_groups(hostname) + settings, settings_origin = read_settings( + local_repo_path, ["global", "base_system.yml"], "global->base_system.yml", settings, settings_origin, groups + ) + else: + settings, settings_origin = read_settings( + local_repo_path, ["global", "base_system.yml"], "global->base_system.yml", settings, settings_origin + ) + # 3. Get settings from special fabric classification (dist + core) if device_type and (device_type == DeviceType.DIST or device_type == DeviceType.CORE): settings, settings_origin = read_settings( @@ -576,8 +584,6 @@ def get_settings( settings_origin, ) if hostname: - # Some settings parsing require knowledge of group memberships - groups = get_groups(hostname) settings, settings_origin = read_settings( local_repo_path, ["global", "routing.yml"], "global->routing.yml", settings, settings_origin, groups ) From 710ab1e8e9c16a22cd211dad3faca460ce6144bc Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 13 Jun 2023 15:09:00 +0200 Subject: [PATCH 114/169] Add support for users settings, with some vendor specific fields for password hashes and permissions. Add support for some campus settings dot1x_multi_host and poe_reboot_maintain --- docs/reporef/index.rst | 58 ++++++++++++++++++++++++----- src/cnaas_nms/db/settings_fields.py | 16 ++++++++ 2 files changed, 64 insertions(+), 10 deletions(-) diff --git a/docs/reporef/index.rst b/docs/reporef/index.rst index 1ef9e342..3ce476a0 100644 --- a/docs/reporef/index.rst +++ b/docs/reporef/index.rst @@ -367,14 +367,55 @@ base_system.yml: Contains base system settings like: -- ntp_servers -- snmp_servers -- dns_servers -- syslog_servers -- flow_collectors -- dhcp_relays -- internal_vlans +- ntp_servers: List of + + * host: IP address or hostname of NTP server + +- snmp_servers: List of + + * host: IP address or hostname of SNMP trap target + * port: Port number. Optional + +- dns_servers: List of + + * host: IP address to DNS server + +- syslog_servers: List of + + * host: IP address or hostname to syslog server + * port: Port number. Optional + +- flow_collectors: List of + + * host: IP address or hostname to flow collector + * port: Port number. Optional + +- dhcp_relays: List of + + * host: IP address or hostname to DHCP relay + +- users: List of + + * username: Username string + * ssh_key: SSH public key string. Optional + * uid: UserID number. Optional + * password_hash_arista: Hashed password string for Arista devices. Optional + * password_hash_cisco: Hashed password string for Cisco devices. Optional + * password_hash_juniper: Hashed password string for Juniper devices. Optional + * permission_arista: String to specify user access level for Arista, ex "privilege 15 role network-admin". Optional + * permission_cisco: String to specify user access level for Cisco, ex "privilege 15". Optional + * permission_juniper: String to specify user access level for Juniper, ex "superuser". Optional + * groups: A list of device groups that this user should be provisioned on + +- internal_vlans: + + * vlan_low: Low end of internal VLAN range + * vlan_high: High end of internal VLAN range + * allocation_order: Allocation order, default "ascending" + - dot1x_fail_vlan: Numeric ID of authentication fail VLAN +- dot1x_multi_host: Allow multiple clients behind a dot1x authenticated port. Default false +- poe_reboot_maintain: Maintain POE supply during reboot of the switch. Default false - organization_name: Free format string describing organization name - domain_name: DNS domain (suffix) @@ -406,9 +447,6 @@ Example of base_system.yml: dot1x_fail_vlan: 13 -syslog_servers and radius_severs can optionally have the key "port" specified -to indicate a non-defalut layer4 (TCP/UDP) port number. - internal_vlans can optionally be specified if you want to manually define the range of internal VLANs on L3 switches. You can also specify the option "allocation_order" under internal_vlans which is a custom string that defaults diff --git a/src/cnaas_nms/db/settings_fields.py b/src/cnaas_nms/db/settings_fields.py index cb1a9d34..879fc53b 100644 --- a/src/cnaas_nms/db/settings_fields.py +++ b/src/cnaas_nms/db/settings_fields.py @@ -296,6 +296,19 @@ class f_underlay(BaseModel): bgp_asn: Optional[as_num_type] = as_num_schema +class f_user(BaseModel): + username: str + ssh_key: Optional[str] = None + uid: Optional[int] = None + password_hash_arista: Optional[str] = None + password_hash_cisco: Optional[str] = None + password_hash_juniper: Optional[str] = None + permission_arista: Optional[str] = None + permission_cisco: Optional[str] = None + permission_juniper: Optional[str] = None + groups: List[str] = [] + + class f_root(BaseModel): ntp_servers: List[f_ntp_server] = [] radius_servers: List[f_radius_server] = [] @@ -318,6 +331,9 @@ class f_root(BaseModel): cli_append_str: str = "" organization_name: str = "" domain_name: Optional[str] = domain_name_schema + users: List[f_user] = [] + dot1x_multi_host: bool = False + poe_reboot_maintain: bool = False class f_group_item(BaseModel): From 4d10c7d0a8827a7df0eaf26ebd901a30029b491f Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 13 Sep 2022 14:00:23 +0200 Subject: [PATCH 115/169] routingpolicy and prefixset models --- src/cnaas_nms/db/settings_fields.py | 31 +++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/cnaas_nms/db/settings_fields.py b/src/cnaas_nms/db/settings_fields.py index 879fc53b..51cb0eab 100644 --- a/src/cnaas_nms/db/settings_fields.py +++ b/src/cnaas_nms/db/settings_fields.py @@ -31,6 +31,7 @@ ipv6_schema = Field(..., regex=f"^{IPV6_REGEX}$", description="IPv6 address") IPV6_IF_REGEX = f"{IPV6_REGEX}" + r"\/[0-9]{1,3}" ipv6_if_schema = Field(None, regex=f"^{IPV6_IF_REGEX}$", description="IPv6 address in CIDR/prefix notation (::/0)") +ipv4_or_ipv6_if_schema = Field(None, regex=f"({IPV4_IF_REGEX}|{IPV6_IF_REGEX})", description="IPv4 or IPv6 prefix") # VLAN name is alphanumeric max 32 chars on Cisco # should not start with number according to some Juniper doc @@ -57,6 +58,10 @@ tcpudp_port_schema = Field(None, ge=0, lt=65536, description="TCP or UDP port number, 0-65535") ebgp_multihop_schema = Field(None, ge=1, le=255, description="Numeric IP TTL, 1-255") maximum_routes_schema = Field(None, ge=0, le=4294967294, description="Maximum number of routes to receive from peer") +accept_or_reject_schema = Field(..., regex=r"^(accept|reject)$", description="Value has to be 'accept' or 'reject'") +prefix_size_or_range_schema = Field( + None, regex=r"^[0-9]{1,3}([-][0-9]{1,3})?$", description="Prefix size or range 0-128" +) GROUP_NAME = r"^([a-zA-Z0-9_-]{1,63}\.?)+$" group_name = Field(..., regex=GROUP_NAME, max_length=253) @@ -309,6 +314,30 @@ class f_user(BaseModel): groups: List[str] = [] +class f_prefixset_item(BaseModel): + prefix: str = ipv4_or_ipv6_if_schema + masklength_range: Optional[str] = prefix_size_or_range_schema + + +class f_prefixset(BaseModel): + mode: str = "ipv4" + prefixes: List[f_prefixset_item] + + +class f_rpolicy_condition(BaseModel): + match_type: str + match_target: str + + +class f_rpolicy_statement(BaseModel): + action: str = accept_or_reject_schema + conditions: List[f_rpolicy_condition] + + +class f_routingpolicy(BaseModel): + statements: List[f_rpolicy_statement] + + class f_root(BaseModel): ntp_servers: List[f_ntp_server] = [] radius_servers: List[f_radius_server] = [] @@ -334,6 +363,8 @@ class f_root(BaseModel): users: List[f_user] = [] dot1x_multi_host: bool = False poe_reboot_maintain: bool = False + prefix_sets: Dict[str, f_prefixset] = {} + routing_policies: Dict[str, f_routingpolicy] = {} class f_group_item(BaseModel): From 6976a5d92f3a7e6f8e907e634f9a146c2f67b2f1 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Thu, 29 Jun 2023 10:10:08 +0200 Subject: [PATCH 116/169] docs for prefix_sets and routing_policies --- docs/reporef/index.rst | 54 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/docs/reporef/index.rst b/docs/reporef/index.rst index 3ce476a0..3ad647be 100644 --- a/docs/reporef/index.rst +++ b/docs/reporef/index.rst @@ -275,6 +275,25 @@ Can contain the following dictionaries with specified keys: * peer_ipv6: IPv6 address of peer * other options are the same as neighbor_v4 +- prefix_sets: Dictionary of {, }: + + * mode: String, either "ipv4", "ipv6" or "mixed" + * prefixes: list of + + * prefix: String for ipv4 or ipv6 prefix, ex: 10.0.0.0/8 + * masklength_range: Optional string defining range of prefixes to match, ex: 24-32 or 32-32 + +- routing_policies: Dictionary of {, }: + + * statements: List of: + + * action: Action to perform on match, either "accept" or "reject" + * conditions: List of: + + * match_type: String, ex "ipv4 prefix-set" + * match_target: String, referring to prefix-set for example: "default-route" + + routing.yml examples: :: @@ -299,12 +318,47 @@ routing.yml examples: - destination: 172.12.0.0/24 nexthop: 10.0.254.1 name: cnaas-mgmt + prefix_sets: + "default": + mode: "ipv4" + prefixes: + - prefix: 0.0.0.0/0 + masklength_range: 0 + "24_or_longer": + mode: "ipv4" + prefixes: + - prefix: 0.0.0.0/0 + masklength_range: 24-32 + "v6default": + mode: "ipv6" + prefixes: + - prefix: ::/0 + "all-ipv6": + mode: "ipv6" + prefixes: + - prefix: ::/0 + masklength_range: 0-128 + routing_policies: + "allow_default": + statements: + - action: "accept" + conditions: + - match_type: "ipv4 prefix-set" + match_target: "default" + "allow_all_v6": + statements: + - action: "accept" + conditions: + - match_type: "ipv6 prefix-set" + match_target: "all-ipv6" vxlans.yml: Contains a dictinary called "vxlans", which in turn has one dictinoary per vxlan, vxlan name is the dictionary key and dictionaly values are: +- vxlans: Dictionary of {, }: + * vni: VXLAN ID, 1-16777215 * vrf: VRF name. Optional unless ipv4_gw is also specified. * vlan_id: VLAN ID, 1-4095 From 9b04fe8e9ebd6bd0e65e23cbcc74af6eea0e6578 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 11 Aug 2023 20:25:34 +0000 Subject: [PATCH 117/169] Bump gitpython from 3.1.30 to 3.1.32 Bumps [gitpython](https://github.com/gitpython-developers/GitPython) from 3.1.30 to 3.1.32. - [Release notes](https://github.com/gitpython-developers/GitPython/releases) - [Changelog](https://github.com/gitpython-developers/GitPython/blob/main/CHANGES) - [Commits](https://github.com/gitpython-developers/GitPython/compare/3.1.30...3.1.32) --- updated-dependencies: - dependency-name: gitpython dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 68824699..9e21f63f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ flask-restx==1.0.5 flask==2.2.5 Flask-SocketIO==5.3.1 gevent==21.12.0 -GitPython==3.1.30 +GitPython==3.1.32 mypy==0.971 mypy-extensions==0.4.3 markupsafe==2.1.1 From 2f0056841d0ab2a6528b6179c0d48d339891cefc Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Thu, 29 Jun 2023 13:52:55 +0200 Subject: [PATCH 118/169] add dataclasses and methods for writing and reading sync history events from redis --- src/cnaas_nms/devicehandler/sync_history.py | 58 +++++++++++++++++++ .../devicehandler/tests/test_sync_history.py | 9 +++ 2 files changed, 67 insertions(+) create mode 100644 src/cnaas_nms/devicehandler/sync_history.py create mode 100644 src/cnaas_nms/devicehandler/tests/test_sync_history.py diff --git a/src/cnaas_nms/devicehandler/sync_history.py b/src/cnaas_nms/devicehandler/sync_history.py new file mode 100644 index 00000000..85856cd7 --- /dev/null +++ b/src/cnaas_nms/devicehandler/sync_history.py @@ -0,0 +1,58 @@ +import json +import time +from dataclasses import asdict, dataclass +from typing import Dict, List, Optional + +from cnaas_nms.db.session import redis_session + +REDIS_SYNC_HISTORY_KEYNAME = "sync_history" + + +@dataclass(frozen=True) +class SyncEvent: + cause: str + time: float + by: str + job_id: Optional[int] + + +@dataclass +class SyncHistory: + """Contains a history field which is a dict of hostname: List[SyncEvent]""" + + history: Dict[str, List[SyncEvent]] + + def redis_dump(self) -> Dict[str, str]: + # redis doesn't support nested datatypes, so save inner list as string of json instead + return {k: json.dumps([asdict(e) for e in v]) for (k, v) in self.history.items()} + + def redis_load(self, redis_dict: Dict[str, str]): + self.history = {k: [SyncEvent(**e) for e in json.loads(v)] for (k, v) in redis_dict.items()} + + +def add_sync_event(hostname: str, cause: str, by: str, job_id: Optional[int] = None): + sync_event = SyncEvent(cause, time.time(), by, job_id) + with redis_session() as redis: + if not redis.exists(REDIS_SYNC_HISTORY_KEYNAME): + new_history = SyncHistory(history={hostname: [sync_event]}) + redis.hset(REDIS_SYNC_HISTORY_KEYNAME, mapping=new_history.redis_dump()) + else: + sync_history = SyncHistory(history={}) + sync_history.redis_load(redis.hgetall(REDIS_SYNC_HISTORY_KEYNAME)) + if hostname in sync_history.history.keys(): + sync_history.history[hostname].append(sync_event) + else: + sync_history.history[hostname] = [sync_event] + redis.hset(REDIS_SYNC_HISTORY_KEYNAME, mapping=sync_history.redis_dump()) + + +def get_sync_events(hostnames: List[str]) -> SyncHistory: + ret = SyncHistory(history={}) + sync_history = SyncHistory(history={}) + with redis_session() as redis: + sync_history.redis_load(redis.hgetall(REDIS_SYNC_HISTORY_KEYNAME)) + for hostname, events in sync_history.history.items(): + if hostname in hostnames: + ret.history[hostname] = events + + return ret diff --git a/src/cnaas_nms/devicehandler/tests/test_sync_history.py b/src/cnaas_nms/devicehandler/tests/test_sync_history.py new file mode 100644 index 00000000..251cae6b --- /dev/null +++ b/src/cnaas_nms/devicehandler/tests/test_sync_history.py @@ -0,0 +1,9 @@ +from cnaas_nms.devicehandler.sync_history import add_sync_event, get_sync_events + + +def test_set_sync_history(postgresql, redis): + add_sync_event("eosdist1", "refresh_settings", "indy@sunet.se", 123) + + +def test_get_sync_history(postgresql, redis): + print(get_sync_events("eosdist1")) From 8c661a0542d4b1e37fe74a3d6a661f90cfe82ea3 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Thu, 29 Jun 2023 16:09:02 +0200 Subject: [PATCH 119/169] use redis hget/hset on specific keys instead of entire hash to avoid thread issues hopefully --- src/cnaas_nms/devicehandler/sync_history.py | 23 ++++++++++++++----- .../devicehandler/tests/test_sync_history.py | 15 ++++++++---- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_history.py b/src/cnaas_nms/devicehandler/sync_history.py index 85856cd7..e06170ff 100644 --- a/src/cnaas_nms/devicehandler/sync_history.py +++ b/src/cnaas_nms/devicehandler/sync_history.py @@ -4,8 +4,10 @@ from typing import Dict, List, Optional from cnaas_nms.db.session import redis_session +from cnaas_nms.tools.log import get_logger REDIS_SYNC_HISTORY_KEYNAME = "sync_history" +logger = get_logger() @dataclass(frozen=True) @@ -36,14 +38,18 @@ def add_sync_event(hostname: str, cause: str, by: str, job_id: Optional[int] = N if not redis.exists(REDIS_SYNC_HISTORY_KEYNAME): new_history = SyncHistory(history={hostname: [sync_event]}) redis.hset(REDIS_SYNC_HISTORY_KEYNAME, mapping=new_history.redis_dump()) + logger.debug("New sync_history hash created in redis") else: - sync_history = SyncHistory(history={}) - sync_history.redis_load(redis.hgetall(REDIS_SYNC_HISTORY_KEYNAME)) - if hostname in sync_history.history.keys(): - sync_history.history[hostname].append(sync_event) + current_sync_event_data = redis.hget(REDIS_SYNC_HISTORY_KEYNAME, hostname) + current_sync_events: List[SyncEvent] = [] + if current_sync_event_data: + current_sync_events = [SyncEvent(**e) for e in json.loads(current_sync_event_data)] + current_sync_events.append(sync_event) else: - sync_history.history[hostname] = [sync_event] - redis.hset(REDIS_SYNC_HISTORY_KEYNAME, mapping=sync_history.redis_dump()) + current_sync_events = [sync_event] + redis.hset( + REDIS_SYNC_HISTORY_KEYNAME, key=hostname, value=json.dumps([asdict(e) for e in current_sync_events]) + ) def get_sync_events(hostnames: List[str]) -> SyncHistory: @@ -56,3 +62,8 @@ def get_sync_events(hostnames: List[str]) -> SyncHistory: ret.history[hostname] = events return ret + + +def remove_sync_events(hostname: str): + with redis_session() as redis: + redis.hdel(REDIS_SYNC_HISTORY_KEYNAME, hostname) diff --git a/src/cnaas_nms/devicehandler/tests/test_sync_history.py b/src/cnaas_nms/devicehandler/tests/test_sync_history.py index 251cae6b..e4bec4f5 100644 --- a/src/cnaas_nms/devicehandler/tests/test_sync_history.py +++ b/src/cnaas_nms/devicehandler/tests/test_sync_history.py @@ -1,9 +1,16 @@ -from cnaas_nms.devicehandler.sync_history import add_sync_event, get_sync_events +from cnaas_nms.devicehandler.sync_history import add_sync_event, get_sync_events, remove_sync_events -def test_set_sync_history(postgresql, redis): - add_sync_event("eosdist1", "refresh_settings", "indy@sunet.se", 123) +def test_set_sync_history(redis): + add_sync_event("eosdist1", "refresh_settings", "unittest", 123) + add_sync_event("eosdist1", "refresh_settings", "unittest", 124) + add_sync_event("eosdist1", "ztp", "unittest") -def test_get_sync_history(postgresql, redis): +def test_get_sync_history(redis): + print(get_sync_events("eosdist1")) + + +def test_remove_sync_history(redis): + remove_sync_events("eosdist1") print(get_sync_events("eosdist1")) From f2a0c222aee4c17d6dafc6d1e1f9cf67b36fe359 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 30 Jun 2023 11:27:37 +0200 Subject: [PATCH 120/169] make sure add_sync_event doesn't cause exceptions for anyone calling it, it's not critical for operations --- src/cnaas_nms/devicehandler/sync_history.py | 43 +++++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_history.py b/src/cnaas_nms/devicehandler/sync_history.py index e06170ff..00a1e79c 100644 --- a/src/cnaas_nms/devicehandler/sync_history.py +++ b/src/cnaas_nms/devicehandler/sync_history.py @@ -3,6 +3,8 @@ from dataclasses import asdict, dataclass from typing import Dict, List, Optional +from redis.exceptions import RedisError + from cnaas_nms.db.session import redis_session from cnaas_nms.tools.log import get_logger @@ -32,24 +34,31 @@ def redis_load(self, redis_dict: Dict[str, str]): self.history = {k: [SyncEvent(**e) for e in json.loads(v)] for (k, v) in redis_dict.items()} -def add_sync_event(hostname: str, cause: str, by: str, job_id: Optional[int] = None): - sync_event = SyncEvent(cause, time.time(), by, job_id) - with redis_session() as redis: - if not redis.exists(REDIS_SYNC_HISTORY_KEYNAME): - new_history = SyncHistory(history={hostname: [sync_event]}) - redis.hset(REDIS_SYNC_HISTORY_KEYNAME, mapping=new_history.redis_dump()) - logger.debug("New sync_history hash created in redis") - else: - current_sync_event_data = redis.hget(REDIS_SYNC_HISTORY_KEYNAME, hostname) - current_sync_events: List[SyncEvent] = [] - if current_sync_event_data: - current_sync_events = [SyncEvent(**e) for e in json.loads(current_sync_event_data)] - current_sync_events.append(sync_event) +def add_sync_event(hostname: str, cause: str, by: Optional[str] = None, job_id: Optional[int] = None): + try: + if not by: + by = "unknown" + sync_event = SyncEvent(cause, time.time(), by, job_id) + with redis_session() as redis: + if not redis.exists(REDIS_SYNC_HISTORY_KEYNAME): + new_history = SyncHistory(history={hostname: [sync_event]}) + redis.hset(REDIS_SYNC_HISTORY_KEYNAME, mapping=new_history.redis_dump()) + logger.debug("New sync_history hash created in redis") else: - current_sync_events = [sync_event] - redis.hset( - REDIS_SYNC_HISTORY_KEYNAME, key=hostname, value=json.dumps([asdict(e) for e in current_sync_events]) - ) + current_sync_event_data = redis.hget(REDIS_SYNC_HISTORY_KEYNAME, hostname) + current_sync_events: List[SyncEvent] = [] + if current_sync_event_data: + current_sync_events = [SyncEvent(**e) for e in json.loads(current_sync_event_data)] + current_sync_events.append(sync_event) + else: + current_sync_events = [sync_event] + redis.hset( + REDIS_SYNC_HISTORY_KEYNAME, key=hostname, value=json.dumps([asdict(e) for e in current_sync_events]) + ) + except RedisError as e: + logger.exception(f"Redis Error while adding sync event (not critical): {e}") + except Exception as e: + logger.exception(f"Exception while adding sync event (not critical): {e}") def get_sync_events(hostnames: List[str]) -> SyncHistory: From a5c5c47592e3199fb55c2336ce38af777d92196f Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 30 Jun 2023 16:30:54 +0200 Subject: [PATCH 121/169] create sync status events when devices are marked unsync. documentation for events --- docs/tutorials/index.rst | 1 + docs/tutorials/sync-status/index.rst | 41 +++++++++++++++++++++ src/cnaas_nms/api/device.py | 2 + src/cnaas_nms/api/interface.py | 4 +- src/cnaas_nms/api/linknet.py | 9 ++++- src/cnaas_nms/api/mgmtdomain.py | 9 ++++- src/cnaas_nms/db/device.py | 8 +++- src/cnaas_nms/db/git.py | 10 +++-- src/cnaas_nms/db/linknet.py | 3 ++ src/cnaas_nms/devicehandler/firmware.py | 27 ++++++++++---- src/cnaas_nms/devicehandler/init_device.py | 2 + src/cnaas_nms/devicehandler/sync_devices.py | 8 ++++ src/cnaas_nms/devicehandler/update.py | 2 + 13 files changed, 109 insertions(+), 17 deletions(-) create mode 100644 docs/tutorials/sync-status/index.rst diff --git a/docs/tutorials/index.rst b/docs/tutorials/index.rst index 870eeaca..ff73e53b 100644 --- a/docs/tutorials/index.rst +++ b/docs/tutorials/index.rst @@ -8,3 +8,4 @@ Tutorials to help you get started with using and understanding CNaaS CNaaS Intro Change workflow + Synchronization status diff --git a/docs/tutorials/sync-status/index.rst b/docs/tutorials/sync-status/index.rst new file mode 100644 index 00000000..b9e31904 --- /dev/null +++ b/docs/tutorials/sync-status/index.rst @@ -0,0 +1,41 @@ +.. _sync_status_tutorial: + +Synchronization status +====================== + +All devices managed by CNaaS-NMS has a synchronization status that can be true +(meaning synchronized) or false (not synchronized). This status is true if NMS +thinks that all the device configuration is exactly as generated by NMS +according to the last settings, templates and API/DB data. If after refreshing +settings for example NMS detects that settings files for one device was changed, +that device will be marked as unsynchronized. + +If device configuration is changed via some out-of-band method from NMS like +a user logging in on to a device via SSH and making configuration changes, +NMS will not detect that so the device might still show synchronized: true. + +Events causing devices to become unsynchronized: + +- Settings repo is updated, devices are marked unsynchronized depending on what files changed: + + * global → all devices becomes unsynchronized + * access/dist/core → all devices of this type become unsynchronized + * device/ → that hostname becomes unsynchronized + +- Templates repo is updated: + + * Based upon dependencies in mapping.yml and which files was updated devices become unsynchronized + +- syncto touches a device and discovers it's been modified outside of NMS (config hash mismatch) +- Interfaces are updated via device interfaces API +- ZTP of new device will affect neighbor devices +- Device is moved to UNMANAGED state +- Device upgrade (post_flight step) is performed (since v1.2.0) +- Mgmtdomain is added/removed/changed (since v1.2.0) +- Linknet added/removed via API (since v1.3.0) +- Commit confirm failed (since v1.5.0) + +Events causing devices to become synchronized: + +- Syncto liverun pushes new config to device +- Syncto dryrun noticing there is no diff to be applied diff --git a/src/cnaas_nms/api/device.py b/src/cnaas_nms/api/device.py index 42cc7880..5027ffe6 100644 --- a/src/cnaas_nms/api/device.py +++ b/src/cnaas_nms/api/device.py @@ -30,6 +30,7 @@ ) from cnaas_nms.db.stackmember import Stackmember from cnaas_nms.devicehandler.nornir_helper import cnaas_init, inventory_selector +from cnaas_nms.devicehandler.sync_history import add_sync_event from cnaas_nms.scheduler.scheduler import Scheduler from cnaas_nms.tools.log import get_logger from cnaas_nms.tools.security import get_jwt_identity, jwt_required @@ -238,6 +239,7 @@ def delete(self, device_id): try: for nei in dev.get_neighbors(session): nei.synchronized = False + add_sync_event(nei.hostname, "neighbor_deleted", get_jwt_identity()) except Exception as e: logger.warning("Could not mark neighbor as unsync after deleting {}: {}".format(dev.hostname, e)) try: diff --git a/src/cnaas_nms/api/interface.py b/src/cnaas_nms/api/interface.py index 75f21f38..9ec424f7 100644 --- a/src/cnaas_nms/api/interface.py +++ b/src/cnaas_nms/api/interface.py @@ -10,7 +10,8 @@ from cnaas_nms.db.settings import get_settings from cnaas_nms.devicehandler.interface_state import bounce_interfaces, get_interface_states from cnaas_nms.devicehandler.sync_devices import resolve_vlanid, resolve_vlanid_list -from cnaas_nms.tools.security import jwt_required +from cnaas_nms.devicehandler.sync_history import add_sync_event +from cnaas_nms.tools.security import get_jwt_identity, jwt_required from cnaas_nms.version import __api_version__ api = Namespace("device", description="API for handling interfaces", prefix="/api/{}".format(__api_version__)) @@ -236,6 +237,7 @@ def put(self, hostname): if updated: dev.synchronized = False + add_sync_event(hostname, "interface_updated", get_jwt_identity()) if errors: if data: diff --git a/src/cnaas_nms/api/linknet.py b/src/cnaas_nms/api/linknet.py index 047b3d49..008ab73f 100644 --- a/src/cnaas_nms/api/linknet.py +++ b/src/cnaas_nms/api/linknet.py @@ -10,8 +10,9 @@ from cnaas_nms.db.device import Device, DeviceType from cnaas_nms.db.linknet import Linknet from cnaas_nms.db.session import sqla_session +from cnaas_nms.devicehandler.sync_history import add_sync_event from cnaas_nms.devicehandler.underlay import find_free_infra_linknet -from cnaas_nms.tools.security import jwt_required +from cnaas_nms.tools.security import get_jwt_identity, jwt_required from cnaas_nms.version import __api_version__ linknets_api = Namespace("linknets", description="API for handling linknets", prefix="/api/{}".format(__api_version__)) @@ -198,7 +199,9 @@ def delete(self): if not cur_linknet: return empty_result(status="error", data="No such linknet found in database"), 404 cur_linknet.device_a.synchronized = False + add_sync_event(cur_linknet.device_a.hostname, "linknet_deleted", get_jwt_identity()) cur_linknet.device_b.synchronized = False + add_sync_event(cur_linknet.device_b.hostname, "linknet_deleted", get_jwt_identity()) session.delete(cur_linknet) session.commit() return empty_result(status="success", data={"deleted_linknet": cur_linknet.as_dict()}), 200 @@ -225,7 +228,9 @@ def delete(self, linknet_id): instance: Linknet = session.query(Linknet).filter(Linknet.id == linknet_id).one_or_none() if instance: instance.device_a.synchronized = False + add_sync_event(instance.device_a.hostname, "linknet_deleted", get_jwt_identity()) instance.device_b.synchronized = False + add_sync_event(instance.device_b.hostname, "linknet_deleted", get_jwt_identity()) session.delete(instance) session.commit() return empty_result(status="success", data={"deleted_linknet": instance.as_dict()}), 200 @@ -262,7 +267,9 @@ def put(self, linknet_id): changed: bool = update_sqla_object(instance, json_data) if changed: instance.device_a.synchronized = False + add_sync_event(instance.device_a.hostname, "linknet_updated", get_jwt_identity()) instance.device_b.synchronized = False + add_sync_event(instance.device_b.hostname, "linknet_updated", get_jwt_identity()) return empty_result(status="success", data={"updated_linknet": instance.as_dict()}), 200 else: return empty_result(status="success", data={"unchanged_linknet": instance.as_dict()}), 200 diff --git a/src/cnaas_nms/api/mgmtdomain.py b/src/cnaas_nms/api/mgmtdomain.py index 0bf6bf56..f702122e 100644 --- a/src/cnaas_nms/api/mgmtdomain.py +++ b/src/cnaas_nms/api/mgmtdomain.py @@ -12,7 +12,8 @@ from cnaas_nms.db.mgmtdomain import Mgmtdomain from cnaas_nms.db.session import sqla_session from cnaas_nms.db.settings_fields import vlan_id_schema_optional -from cnaas_nms.tools.security import jwt_required +from cnaas_nms.devicehandler.sync_history import add_sync_event +from cnaas_nms.tools.security import get_jwt_identity, jwt_required from cnaas_nms.version import __api_version__ mgmtdomains_api = Namespace( @@ -98,7 +99,9 @@ def delete(self, mgmtdomain_id): instance: Mgmtdomain = session.query(Mgmtdomain).filter(Mgmtdomain.id == mgmtdomain_id).one_or_none() if instance: instance.device_a.synchronized = False + add_sync_event(instance.device_a.hostname, "mgmtdomain_deleted", get_jwt_identity()) instance.device_b.synchronized = False + add_sync_event(instance.device_b.hostname, "mgmtdomain_deleted", get_jwt_identity()) session.delete(instance) session.commit() return empty_result(status="success", data={"deleted_mgmtdomain": instance.as_dict()}), 200 @@ -125,7 +128,9 @@ def put(self, mgmtdomain_id): changed: bool = update_sqla_object(instance, json_data) if changed: instance.device_a.synchronized = False + add_sync_event(instance.device_a.hostname, "mgmtdomain_updated", get_jwt_identity()) instance.device_b.synchronized = False + add_sync_event(instance.device_b.hostname, "mgmtdomain_updated", get_jwt_identity()) return empty_result(status="success", data={"updated_mgmtdomain": instance.as_dict()}), 200 else: return empty_result(status="success", data={"unchanged_mgmtdomain": instance.as_dict()}), 200 @@ -209,7 +214,9 @@ def post(self): return empty_result("error", "Integrity error: {}".format(e)), 400 device_a.synchronized = False + add_sync_event(device_a.hostname, "mgmtdomain_created", get_jwt_identity()) device_b.synchronized = False + add_sync_event(device_b.hostname, "mgmtdomain_created", get_jwt_identity()) return empty_result(status="success", data={"added_mgmtdomain": new_mgmtd.as_dict()}), 200 else: errors.append( diff --git a/src/cnaas_nms/db/device.py b/src/cnaas_nms/db/device.py index 4604311f..6135b8b3 100644 --- a/src/cnaas_nms/db/device.py +++ b/src/cnaas_nms/db/device.py @@ -16,6 +16,7 @@ import cnaas_nms.db.site from cnaas_nms.db.interface import Interface, InterfaceConfigType from cnaas_nms.db.stackmember import Stackmember +from cnaas_nms.devicehandler.sync_history import add_sync_event from cnaas_nms.tools.event import add_event @@ -309,7 +310,9 @@ def valid_hostname(cls, hostname: str) -> bool: return all(hostname_part_re.match(x) for x in hostname.split(".")) @classmethod - def set_devtype_syncstatus(cls, session, devtype: DeviceType, platform: Optional[str] = None, syncstatus=False): + def set_devtype_syncstatus( + cls, session, devtype: DeviceType, by: str, platform: Optional[str] = None, job_id: Optional[int] = None + ): """Update sync status of devices of type devtype""" dev: Device if platform: @@ -319,7 +322,8 @@ def set_devtype_syncstatus(cls, session, devtype: DeviceType, platform: Optional else: dev_query = session.query(Device).filter(Device.device_type == devtype).all() for dev in dev_query: - dev.synchronized = syncstatus + dev.synchronized = False + add_sync_event(dev.hostname, "refresh_templates", by, job_id) @classmethod def device_create(cls, **kwargs) -> Device: diff --git a/src/cnaas_nms/db/git.py b/src/cnaas_nms/db/git.py index e8b99734..cc4055b5 100644 --- a/src/cnaas_nms/db/git.py +++ b/src/cnaas_nms/db/git.py @@ -14,6 +14,7 @@ from cnaas_nms.db.joblock import Joblock, JoblockError from cnaas_nms.db.session import redis_session, sqla_session from cnaas_nms.db.settings import DIR_STRUCTURE, SettingsSyntaxError, VlanConflictError, rebuild_settings_cache +from cnaas_nms.devicehandler.sync_history import add_sync_event from cnaas_nms.tools.log import get_logger from git import InvalidGitRepositoryError, Repo from git.exc import GitCommandError, NoSuchPathError @@ -77,7 +78,7 @@ def refresh_repo(repo_type: RepoType = RepoType.TEMPLATES, scheduled_by: str = N if not Joblock.acquire_lock(session, name="devices", job_id=job_id): raise JoblockError("Unable to acquire lock for configuring devices") try: - result = _refresh_repo_task(repo_type) + result = _refresh_repo_task(repo_type, job_id=job_id) job.finish_time = datetime.datetime.utcnow() job.status = JobStatus.FINISHED job.result = {"message": result, "repository": repo_type.name} @@ -143,7 +144,7 @@ def reset_repo(local_repo: Repo, remote_repo_path: str): local_repo.head.reset(index=True, working_tree=True) -def _refresh_repo_task(repo_type: RepoType = RepoType.TEMPLATES) -> str: +def _refresh_repo_task(repo_type: RepoType = RepoType.TEMPLATES, job_id: Optional[int] = None) -> str: """Should only be called by refresh_repo function.""" if repo_type == RepoType.TEMPLATES: local_repo_path = app_settings.TEMPLATES_LOCAL @@ -236,11 +237,12 @@ def _refresh_repo_task(repo_type: RepoType = RepoType.TEMPLATES) -> str: with sqla_session() as session: devtype: DeviceType for devtype in updated_devtypes: - Device.set_devtype_syncstatus(session, devtype, syncstatus=False) + Device.set_devtype_syncstatus(session, devtype, ret, job_id) for hostname in updated_hostnames: dev: Device = session.query(Device).filter(Device.hostname == hostname).one_or_none() if dev: dev.synchronized = False + add_sync_event(hostname, "refresh_settings", ret, job_id) else: logger.warn("Settings updated for unknown device: {}".format(hostname)) @@ -252,7 +254,7 @@ def _refresh_repo_task(repo_type: RepoType = RepoType.TEMPLATES) -> str: with sqla_session() as session: devtype: DeviceType for devtype, platform in updated_devtypes: - Device.set_devtype_syncstatus(session, devtype, platform, syncstatus=False) + Device.set_devtype_syncstatus(session, devtype, ret, platform, job_id) return ret diff --git a/src/cnaas_nms/db/linknet.py b/src/cnaas_nms/db/linknet.py index abd4a508..3564d5e7 100644 --- a/src/cnaas_nms/db/linknet.py +++ b/src/cnaas_nms/db/linknet.py @@ -10,6 +10,7 @@ import cnaas_nms.db.base import cnaas_nms.db.device import cnaas_nms.db.site +from cnaas_nms.devicehandler.sync_history import add_sync_event class Linknet(cnaas_nms.db.base.Base): @@ -165,5 +166,7 @@ def create_linknet( new_linknet.ipv4_network = str(ipv4_network) if strict_check: dev_a.synchronized = False + add_sync_event(dev_a.hostname, "linknet_created") dev_b.synchronized = False + add_sync_event(dev_b.hostname, "linknet_created") return new_linknet diff --git a/src/cnaas_nms/devicehandler/firmware.py b/src/cnaas_nms/devicehandler/firmware.py index dcb9e9f9..34061d60 100644 --- a/src/cnaas_nms/devicehandler/firmware.py +++ b/src/cnaas_nms/devicehandler/firmware.py @@ -11,6 +11,7 @@ from cnaas_nms.db.job import Job from cnaas_nms.db.session import redis_session, sqla_session from cnaas_nms.devicehandler.nornir_helper import NornirJobResult, cnaas_init, inventory_selector +from cnaas_nms.devicehandler.sync_history import add_sync_event from cnaas_nms.scheduler.thread_data import set_thread_data from cnaas_nms.scheduler.wrapper import job_wrapper from cnaas_nms.tools.log import get_logger @@ -20,7 +21,7 @@ class FirmwareAlreadyActiveException(Exception): pass -def arista_pre_flight_check(task, job_id: Optional[str] = None) -> str: +def arista_pre_flight_check(task, job_id: Optional[int] = None) -> str: """ NorNir task to do some basic checks before attempting to upgrade a switch. @@ -56,13 +57,15 @@ def arista_pre_flight_check(task, job_id: Optional[str] = None) -> str: return "Pre-flight check done." -def arista_post_flight_check(task, post_waittime: int, job_id: Optional[str] = None) -> str: +def arista_post_flight_check(task, post_waittime: int, scheduled_by: str, job_id: Optional[int] = None) -> str: """ NorNir task to update device facts after a switch have been upgraded Args: task: NorNir task post_waittime: Time to wait before trying to gather facts + scheduled_by: Who scheduled the job + job_id: Job ID Returns: String, describing the result @@ -90,6 +93,7 @@ def arista_post_flight_check(task, post_waittime: int, job_id: Optional[str] = N else: dev.confhash = None dev.synchronized = False + add_sync_event(task.host.name, "firmware_upgrade", scheduled_by, job_id) dev.last_seen = datetime.datetime.utcnow() except Exception as e: logger.exception("Could not update OS version on device {}: {}".format(task.host.name, str(e))) @@ -98,7 +102,7 @@ def arista_post_flight_check(task, post_waittime: int, job_id: Optional[str] = N return "Post-flight, OS version updated from {} to {}.".format(prev_os_version, os_version) -def arista_firmware_download(task, filename: str, httpd_url: str, job_id: Optional[str] = None) -> str: +def arista_firmware_download(task, filename: str, httpd_url: str, job_id: Optional[int] = None) -> str: """ NorNir task to download firmware image from the HTTP server. @@ -106,6 +110,7 @@ def arista_firmware_download(task, filename: str, httpd_url: str, job_id: Option task: NorNir task filename: Name of the file to download httpd_url: Base URL to the HTTP server + job_id: Job ID Returns: String, describing the result @@ -157,13 +162,14 @@ def arista_firmware_download(task, filename: str, httpd_url: str, job_id: Option return "Firmware download done." -def arista_firmware_activate(task, filename: str, job_id: Optional[str] = None) -> str: +def arista_firmware_activate(task, filename: str, job_id: Optional[int] = None) -> str: """ NorNir task to modify the boot config for new firmwares. Args: task: NorNir task filename: Name of the new firmware image + job_id: Job ID Returns: String, describing the result @@ -208,12 +214,13 @@ def arista_firmware_activate(task, filename: str, job_id: Optional[str] = None) return "Firmware activate done." -def arista_device_reboot(task, job_id: Optional[str] = None) -> str: +def arista_device_reboot(task, job_id: Optional[int] = None) -> str: """ NorNir task to reboot a single device. Args: task: NorNir task. + job_id: Job ID Returns: String, describing the result @@ -240,7 +247,8 @@ def arista_device_reboot(task, job_id: Optional[str] = None) -> str: def device_upgrade_task( task, - job_id: str, + job_id: int, + scheduled_by: str, filename: str, url: str, reboot: Optional[bool] = False, @@ -320,7 +328,9 @@ def device_upgrade_task( if post_flight and not already_active: logger.info("Running post-flight check on {}, delay start by {}s".format(task.host.name, post_waittime)) try: - res = task.run(task=arista_post_flight_check, post_waittime=post_waittime, job_id=job_id) + res = task.run( + task=arista_post_flight_check, post_waittime=post_waittime, scheduled_by=scheduled_by, job_id=job_id + ) except Exception as e: logger.exception("Failed to run post-flight check: {}".format(str(e))) else: @@ -340,7 +350,7 @@ def device_upgrade( group: Optional[str] = None, hostname: Optional[str] = None, url: Optional[str] = None, - job_id: Optional[str] = None, + job_id: Optional[int] = None, pre_flight: Optional[bool] = False, post_flight: Optional[bool] = False, post_waittime: Optional[int] = 600, @@ -378,6 +388,7 @@ def device_upgrade( nrresult = nr_filtered.run( task=device_upgrade_task, job_id=job_id, + scheduled_by=scheduled_by, download=download, filename=filename, url=url, diff --git a/src/cnaas_nms/devicehandler/init_device.py b/src/cnaas_nms/devicehandler/init_device.py index 857bee99..fad5ede5 100644 --- a/src/cnaas_nms/devicehandler/init_device.py +++ b/src/cnaas_nms/devicehandler/init_device.py @@ -27,6 +27,7 @@ from cnaas_nms.devicehandler.cert import arista_copy_cert from cnaas_nms.devicehandler.nornir_helper import NornirJobResult, get_jinja_env from cnaas_nms.devicehandler.sync_devices import confcheck_devices, populate_device_vars +from cnaas_nms.devicehandler.sync_history import add_sync_event from cnaas_nms.devicehandler.update import set_facts, update_interfacedb_worker, update_linknets from cnaas_nms.plugins.pluginmanager import PluginManagerHandler from cnaas_nms.scheduler.scheduler import Scheduler @@ -855,6 +856,7 @@ def init_device_step2( dev: Device = session.query(Device).filter(Device.id == device_id).one() dev.state = DeviceState.MANAGED dev.synchronized = False + add_sync_event(hostname, "device_init", scheduled_by, job_id) set_facts(dev, facts) management_ip = dev.management_ip dev.dhcp_ip = None diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 7d2c0c46..b1c2ee5b 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -26,6 +26,7 @@ from cnaas_nms.devicehandler.changescore import calculate_score from cnaas_nms.devicehandler.get import calc_config_hash from cnaas_nms.devicehandler.nornir_helper import NornirJobResult, cnaas_init, get_jinja_env, inventory_selector +from cnaas_nms.devicehandler.sync_history import add_sync_event, remove_sync_events from cnaas_nms.scheduler.scheduler import Scheduler from cnaas_nms.scheduler.thread_data import set_thread_data from cnaas_nms.scheduler.wrapper import job_wrapper @@ -677,6 +678,7 @@ def confcheck_devices(session, hostnames: List[str], job_id=None): for hostname in nrresult.failed_hosts.keys(): dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = False + add_sync_event(hostname, "confighash", "unknown", job_id) raise Exception("Configuration hash check failed for {}".format(" ".join(nrresult.failed_hosts.keys()))) @@ -768,10 +770,12 @@ def confirm_devices( logger.debug("Setting device as unsync for failed commit-confirm on device {}".format(host)) dev: Device = session.query(Device).filter(Device.hostname == host).one() dev.synchronized = False + add_sync_event(host, "commit_confirm_failed", scheduled_by, job_id) dev.confhash = None else: dev: Device = session.query(Device).filter(Device.hostname == host).one() dev.synchronized = True + remove_sync_events(host) dev.last_seen = datetime.datetime.utcnow() logger.info("Releasing lock for devices from syncto job: {} (in commit-job {})".format(prev_job_id, job_id)) @@ -908,15 +912,18 @@ def sync_devices( if dry_run: dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = False + add_sync_event(hostname, "syncto_dryrun", scheduled_by, job_id) dev.last_seen = datetime.datetime.utcnow() # if next job will commit, that job will mark synchronized on success elif get_confirm_mode(confirm_mode_override) != 2: dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = True + remove_sync_events(hostname) dev.last_seen = datetime.datetime.utcnow() for hostname in unchanged_hosts: dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = True + remove_sync_events(hostname) dev.last_seen = datetime.datetime.utcnow() if not dry_run and get_confirm_mode(confirm_mode_override) != 2: logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) @@ -1039,5 +1046,6 @@ def apply_config( dev: Device = session.query(Device).filter(Device.hostname == hostname).one_or_none() dev.state = DeviceState.UNMANAGED dev.synchronized = False + add_sync_event(hostname, "apply_config", scheduled_by, job_id) return NornirJobResult(nrresult=nrresult) diff --git a/src/cnaas_nms/devicehandler/update.py b/src/cnaas_nms/devicehandler/update.py index 8a15111d..a5586ba8 100644 --- a/src/cnaas_nms/devicehandler/update.py +++ b/src/cnaas_nms/devicehandler/update.py @@ -18,6 +18,7 @@ verify_peer_iftype, ) from cnaas_nms.devicehandler.nornir_helper import NornirJobResult +from cnaas_nms.devicehandler.sync_history import add_sync_event from cnaas_nms.devicehandler.underlay import find_free_infra_linknet from cnaas_nms.scheduler.jobresult import DictJobResult from cnaas_nms.scheduler.wrapper import job_wrapper @@ -137,6 +138,7 @@ def update_interfacedb( if result: dev.synchronized = False + add_sync_event(hostname, "update_interfacedb", scheduled_by, job_id) return DictJobResult(result={"interfaces": result}) From a200101ac97958992049ad1408ad00ff2878e4f9 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 3 Jul 2023 09:54:59 +0200 Subject: [PATCH 122/169] fix arguments when calling set_devtype_syncstatus without platform --- src/cnaas_nms/db/git.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cnaas_nms/db/git.py b/src/cnaas_nms/db/git.py index cc4055b5..c85d4294 100644 --- a/src/cnaas_nms/db/git.py +++ b/src/cnaas_nms/db/git.py @@ -237,7 +237,7 @@ def _refresh_repo_task(repo_type: RepoType = RepoType.TEMPLATES, job_id: Optiona with sqla_session() as session: devtype: DeviceType for devtype in updated_devtypes: - Device.set_devtype_syncstatus(session, devtype, ret, job_id) + Device.set_devtype_syncstatus(session, devtype, ret, job_id=job_id) for hostname in updated_hostnames: dev: Device = session.query(Device).filter(Device.hostname == hostname).one_or_none() if dev: From 72abe615b40c752b0ce408dd6651359037aa9af5 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 3 Jul 2023 11:26:43 +0200 Subject: [PATCH 123/169] Bugfix, make refresh repo git-diff only check file changes in current branch --- src/cnaas_nms/db/git.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cnaas_nms/db/git.py b/src/cnaas_nms/db/git.py index c85d4294..b53a8391 100644 --- a/src/cnaas_nms/db/git.py +++ b/src/cnaas_nms/db/git.py @@ -189,6 +189,9 @@ def _refresh_repo_task(repo_type: RepoType = RepoType.TEMPLATES, job_id: Optiona prev_commit = local_repo.commit().hexsha diff = local_repo.remotes.origin.pull() for item in diff: + if item.ref.remote_head != local_repo.head.ref.name: + continue + ret += "Commit {} by {} at {}\n".format( item.commit.name_rev, item.commit.committer, item.commit.committed_datetime ) From 787662dcfaab3c4e1cf32117351df1e73ca49e99 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 3 Jul 2023 16:01:08 +0200 Subject: [PATCH 124/169] Add API call to query synchistory events for a single device or all devices --- src/cnaas_nms/api/app.py | 2 ++ src/cnaas_nms/api/device.py | 25 ++++++++++++++++++++- src/cnaas_nms/api/tests/test_device.py | 8 +++++++ src/cnaas_nms/db/device.py | 10 +++++++-- src/cnaas_nms/db/git.py | 4 ++-- src/cnaas_nms/devicehandler/sync_history.py | 14 ++++++++---- 6 files changed, 54 insertions(+), 9 deletions(-) diff --git a/src/cnaas_nms/api/app.py b/src/cnaas_nms/api/app.py index 45f3dc00..3f60c734 100644 --- a/src/cnaas_nms/api/app.py +++ b/src/cnaas_nms/api/app.py @@ -17,6 +17,7 @@ device_discover_api, device_init_api, device_initcheck_api, + device_synchistory_api, device_syncto_api, device_update_facts_api, device_update_interfaces_api, @@ -114,6 +115,7 @@ def handle_error(self, e): api.add_namespace(device_update_facts_api) api.add_namespace(device_update_interfaces_api) api.add_namespace(device_cert_api) +api.add_namespace(device_synchistory_api) api.add_namespace(linknets_api) api.add_namespace(linknet_api) api.add_namespace(firmware_api) diff --git a/src/cnaas_nms/api/device.py b/src/cnaas_nms/api/device.py index 5027ffe6..dda14691 100644 --- a/src/cnaas_nms/api/device.py +++ b/src/cnaas_nms/api/device.py @@ -30,7 +30,7 @@ ) from cnaas_nms.db.stackmember import Stackmember from cnaas_nms.devicehandler.nornir_helper import cnaas_init, inventory_selector -from cnaas_nms.devicehandler.sync_history import add_sync_event +from cnaas_nms.devicehandler.sync_history import SyncHistory, add_sync_event, get_sync_events from cnaas_nms.scheduler.scheduler import Scheduler from cnaas_nms.tools.log import get_logger from cnaas_nms.tools.security import get_jwt_identity, jwt_required @@ -64,6 +64,9 @@ device_cert_api = Namespace( "device_cert", description="API to handle device certificates", prefix="/api/{}".format(__api_version__) ) +device_synchistory_api = Namespace( + "device_synchistory", description="API to query sync history for devices", prefix="/api/{}".format(__api_version__) +) device_model = device_api.model( @@ -1090,6 +1093,25 @@ def format_errors(cls, errors): return return_errors +class DeviceSyncHistoryApi(Resource): + @jwt_required + @device_synchistory_api.param("hostname") + def get(self): + args = request.args + result = empty_result() + result["data"] = {"hostnames": {}} + + if "hostname" in args: + if not Device.valid_hostname(args["hostname"]): + return empty_result(status="error", data="Invalid hostname specified"), 400 + sync_history: SyncHistory = get_sync_events([args["hostname"]]) + else: + sync_history: SyncHistory = get_sync_events() + + result["data"]["hostnames"] = sync_history.asdict() + return result + + # Devices device_api.add_resource(DeviceByIdApi, "/") device_api.add_resource(DeviceByHostnameApi, "/") @@ -1106,4 +1128,5 @@ def format_errors(cls, errors): device_update_interfaces_api.add_resource(DeviceUpdateInterfacesApi, "") device_cert_api.add_resource(DeviceCertApi, "") device_api.add_resource(DeviceStackmembersApi, "//stackmember") +device_synchistory_api.add_resource(DeviceSyncHistoryApi, "") # device//current_config diff --git a/src/cnaas_nms/api/tests/test_device.py b/src/cnaas_nms/api/tests/test_device.py index db084038..e56244c1 100644 --- a/src/cnaas_nms/api/tests/test_device.py +++ b/src/cnaas_nms/api/tests/test_device.py @@ -218,6 +218,14 @@ def test_put_stackmembers_dupe_hardware_id(self): result = self.client.put(f"/api/v1.0/device/{self.hostname}/stackmember", json=stackmember_data) self.assertEqual(result.status_code, 400) + def test_get_synchistory(self): + result = self.client.get("/api/v1.0/device_synchistory", query_string={"hostname": "eosaccess"}) + self.assertEqual(result.status_code, 200, "Get synchistory for single device failed") + self.assertTrue("data" in result.json) + result = self.client.get("/api/v1.0/device_synchistory") + self.assertEqual(result.status_code, 200, "Get synchistory for all devices failed") + self.assertTrue("data" in result.json) + if __name__ == "__main__": unittest.main() diff --git a/src/cnaas_nms/db/device.py b/src/cnaas_nms/db/device.py index 6135b8b3..eea256da 100644 --- a/src/cnaas_nms/db/device.py +++ b/src/cnaas_nms/db/device.py @@ -311,7 +311,13 @@ def valid_hostname(cls, hostname: str) -> bool: @classmethod def set_devtype_syncstatus( - cls, session, devtype: DeviceType, by: str, platform: Optional[str] = None, job_id: Optional[int] = None + cls, + session, + devtype: DeviceType, + by: str, + repo_type: str, + platform: Optional[str] = None, + job_id: Optional[int] = None, ): """Update sync status of devices of type devtype""" dev: Device @@ -323,7 +329,7 @@ def set_devtype_syncstatus( dev_query = session.query(Device).filter(Device.device_type == devtype).all() for dev in dev_query: dev.synchronized = False - add_sync_event(dev.hostname, "refresh_templates", by, job_id) + add_sync_event(dev.hostname, f"refresh_{repo_type}", by, job_id) @classmethod def device_create(cls, **kwargs) -> Device: diff --git a/src/cnaas_nms/db/git.py b/src/cnaas_nms/db/git.py index b53a8391..5e8fdd0e 100644 --- a/src/cnaas_nms/db/git.py +++ b/src/cnaas_nms/db/git.py @@ -240,7 +240,7 @@ def _refresh_repo_task(repo_type: RepoType = RepoType.TEMPLATES, job_id: Optiona with sqla_session() as session: devtype: DeviceType for devtype in updated_devtypes: - Device.set_devtype_syncstatus(session, devtype, ret, job_id=job_id) + Device.set_devtype_syncstatus(session, devtype, ret, "settings", job_id=job_id) for hostname in updated_hostnames: dev: Device = session.query(Device).filter(Device.hostname == hostname).one_or_none() if dev: @@ -257,7 +257,7 @@ def _refresh_repo_task(repo_type: RepoType = RepoType.TEMPLATES, job_id: Optiona with sqla_session() as session: devtype: DeviceType for devtype, platform in updated_devtypes: - Device.set_devtype_syncstatus(session, devtype, ret, platform, job_id) + Device.set_devtype_syncstatus(session, devtype, ret, "templates", platform, job_id) return ret diff --git a/src/cnaas_nms/devicehandler/sync_history.py b/src/cnaas_nms/devicehandler/sync_history.py index 00a1e79c..ed374a8e 100644 --- a/src/cnaas_nms/devicehandler/sync_history.py +++ b/src/cnaas_nms/devicehandler/sync_history.py @@ -26,6 +26,9 @@ class SyncHistory: history: Dict[str, List[SyncEvent]] + def asdict(self) -> Dict[str, List]: + return {k: [asdict(e) for e in v] for (k, v) in self.history.items()} + def redis_dump(self) -> Dict[str, str]: # redis doesn't support nested datatypes, so save inner list as string of json instead return {k: json.dumps([asdict(e) for e in v]) for (k, v) in self.history.items()} @@ -61,14 +64,17 @@ def add_sync_event(hostname: str, cause: str, by: Optional[str] = None, job_id: logger.exception(f"Exception while adding sync event (not critical): {e}") -def get_sync_events(hostnames: List[str]) -> SyncHistory: +def get_sync_events(hostnames: Optional[List[str]] = None) -> SyncHistory: ret = SyncHistory(history={}) sync_history = SyncHistory(history={}) with redis_session() as redis: sync_history.redis_load(redis.hgetall(REDIS_SYNC_HISTORY_KEYNAME)) - for hostname, events in sync_history.history.items(): - if hostname in hostnames: - ret.history[hostname] = events + if hostnames: + for hostname, events in sync_history.history.items(): + if hostname in hostnames: + ret.history[hostname] = events + else: + ret = sync_history return ret From ea3181e467fac80e6cffe6c1fe7527954b492133 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 4 Jul 2023 10:29:19 +0200 Subject: [PATCH 125/169] API call to manually add synchistory event. clear synchistory event if device PUT API is called with synchronized=true --- src/cnaas_nms/api/device.py | 35 +++++++++++++++++++-- src/cnaas_nms/devicehandler/sync_history.py | 18 +++++++++-- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/src/cnaas_nms/api/device.py b/src/cnaas_nms/api/device.py index dda14691..6a285713 100644 --- a/src/cnaas_nms/api/device.py +++ b/src/cnaas_nms/api/device.py @@ -13,7 +13,7 @@ import cnaas_nms.devicehandler.sync_devices import cnaas_nms.devicehandler.underlay import cnaas_nms.devicehandler.update -from cnaas_nms.api.generic import build_filter, empty_result, pagination_headers +from cnaas_nms.api.generic import build_filter, empty_result, pagination_headers, parse_pydantic_error from cnaas_nms.api.models.stackmembers_model import StackmembersModel from cnaas_nms.app_settings import api_settings from cnaas_nms.db.device import Device, DeviceState, DeviceType @@ -30,7 +30,13 @@ ) from cnaas_nms.db.stackmember import Stackmember from cnaas_nms.devicehandler.nornir_helper import cnaas_init, inventory_selector -from cnaas_nms.devicehandler.sync_history import SyncHistory, add_sync_event, get_sync_events +from cnaas_nms.devicehandler.sync_history import ( + NewSyncEventModel, + SyncHistory, + add_sync_event, + get_sync_events, + remove_sync_events, +) from cnaas_nms.scheduler.scheduler import Scheduler from cnaas_nms.tools.log import get_logger from cnaas_nms.tools.security import get_jwt_identity, jwt_required @@ -188,6 +194,16 @@ }, ) +synchistory_event_model = device_synchistory_api.model( + "add_event", + { + "hostname": fields.String(required=True), + "cause": fields.String(required=True), + "time": fields.Float(required=False), + "by": fields.String(required=True), + }, +) + def device_data_postprocess(device_list: List[Device]) -> List[dict]: device_primary_group = get_device_primary_groups() @@ -287,6 +303,8 @@ def put(self, device_id): logger.error(msg) session.rollback() return empty_result(status="error", data=msg), 500 + if "synchronized" in json_data and json_data["synchronized"]: + remove_sync_events(dev.hostname) session.commit() update_device_primary_groups() dev_dict = device_data_postprocess([dev])[0] @@ -1111,6 +1129,19 @@ def get(self): result["data"]["hostnames"] = sync_history.asdict() return result + @jwt_required + @device_synchistory_api.expect(device_synchistory_api) + def post(self): + try: + validated_json_data = NewSyncEventModel(**request.get_json()).dict() + except ValidationError as e: + return empty_result("error", parse_pydantic_error(e, NewSyncEventModel, request.get_json())), 400 + try: + add_sync_event(**validated_json_data) + except Exception as e: + return empty_result("error", str(e)) + return empty_result(data=validated_json_data) + # Devices device_api.add_resource(DeviceByIdApi, "/") diff --git a/src/cnaas_nms/devicehandler/sync_history.py b/src/cnaas_nms/devicehandler/sync_history.py index ed374a8e..772543f1 100644 --- a/src/cnaas_nms/devicehandler/sync_history.py +++ b/src/cnaas_nms/devicehandler/sync_history.py @@ -3,6 +3,7 @@ from dataclasses import asdict, dataclass from typing import Dict, List, Optional +from pydantic import BaseModel, Field from redis.exceptions import RedisError from cnaas_nms.db.session import redis_session @@ -12,10 +13,17 @@ logger = get_logger() +class NewSyncEventModel(BaseModel): + hostname: str + cause: str + timestamp: Optional[float] = Field(default_factory=time.time) + by: str + + @dataclass(frozen=True) class SyncEvent: cause: str - time: float + timestamp: float by: str job_id: Optional[int] @@ -37,11 +45,15 @@ def redis_load(self, redis_dict: Dict[str, str]): self.history = {k: [SyncEvent(**e) for e in json.loads(v)] for (k, v) in redis_dict.items()} -def add_sync_event(hostname: str, cause: str, by: Optional[str] = None, job_id: Optional[int] = None): +def add_sync_event( + hostname: str, cause: str, timestamp: Optional[float] = None, by: Optional[str] = None, job_id: Optional[int] = None +): try: if not by: by = "unknown" - sync_event = SyncEvent(cause, time.time(), by, job_id) + if not timestamp: + timestamp = time.time() + sync_event = SyncEvent(cause, timestamp, by, job_id) with redis_session() as redis: if not redis.exists(REDIS_SYNC_HISTORY_KEYNAME): new_history = SyncHistory(history={hostname: [sync_event]}) From c0982ab94a2d0b2e50e3bda5b4d872de58d03ca2 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 4 Jul 2023 11:07:52 +0200 Subject: [PATCH 126/169] Add more unittests and docs for synchistory events --- docs/apiref/syncto.rst | 57 ++++++++++++++++++++++++-- src/cnaas_nms/api/device.py | 8 +++- src/cnaas_nms/api/tests/test_device.py | 37 +++++++++++++++++ 3 files changed, 98 insertions(+), 4 deletions(-) diff --git a/docs/apiref/syncto.rst b/docs/apiref/syncto.rst index 662266dc..302ed812 100644 --- a/docs/apiref/syncto.rst +++ b/docs/apiref/syncto.rst @@ -43,7 +43,8 @@ version it's supported and enabled by default using mode 1. .. _commit_confirm_modes: -Commit confirm modes: +**Commit confirm modes:** + - 0 = No confirm commit (default up to version 1.4) - 1 = Commit is immediately confirmed for each device when that device is configured (default from version 1.5) @@ -55,8 +56,7 @@ Commit confirm modes: Commit confirm mode can be specified in the configuration file, but it's also possible to override that setting for a specific job using the API argument confirm_mode (see below). -Arguments: ----------- +**Arguments:** - hostname: Optional, the hostname of a device - device_type: Optional, a device type (access, dist or core) @@ -83,3 +83,54 @@ Arguments: If neither hostname or device_type is specified all devices that needs to be sycnhronized will be selected. + +Sync history +------------ + +When an API call causes a device to become unsynchronized a synchronization event is created +in the synchistory log. You can query or manually add events from this history using the API. + +Get synchistory events: + +:: + + curl https://hostname/api/v1.0/device_synchistory?hostname=eosaccess + +Example output: + +:: + + { + "status": "success", + "data": { + "hostnames": { + "eosaccess": [ + { + "cause": "refresh_settings", + "timestamp": 1688458956.684019, + "by": "indy", + "job_id": 123 + } + ] + } + } + } + +If the query parameter "hostname" is left out the API will return events for +all devices. + +"cause" is a text string reference to the thing that caused the device to become +unsynchronized. For more details on events see :ref:`sync_status_tutorial`. +"timestamp" is a floating point number representing the seconds since Unix epoch (UTC). +"by" is string referring to what user triggered the event. "job_id" is an integer +referring to a job if this event was triggered by a job, or otherwise it's null. + +Manually adding a synchistory event: + +:: + + curl https://hostname/api/v1.0/device_synchistory -d '{"hostname": "eosaccess", "cause": "oob", "by": "indy"}' + -X POST -H "Content-Type: application/json" + +The "time" paramater can optionally be specified as a floating point number of seconds +since Unix epoch (UTC). If not provided the current time will be used. diff --git a/src/cnaas_nms/api/device.py b/src/cnaas_nms/api/device.py index 6a285713..69c6d9c4 100644 --- a/src/cnaas_nms/api/device.py +++ b/src/cnaas_nms/api/device.py @@ -1136,10 +1136,16 @@ def post(self): validated_json_data = NewSyncEventModel(**request.get_json()).dict() except ValidationError as e: return empty_result("error", parse_pydantic_error(e, NewSyncEventModel, request.get_json())), 400 + with sqla_session() as session: + device_instance = ( + session.query(Device).filter(Device.hostname == validated_json_data["hostname"]).one_or_none() + ) + if not device_instance: + return empty_result("error", "Device not found"), 400 try: add_sync_event(**validated_json_data) except Exception as e: - return empty_result("error", str(e)) + return empty_result("error", str(e)), 500 return empty_result(data=validated_json_data) diff --git a/src/cnaas_nms/api/tests/test_device.py b/src/cnaas_nms/api/tests/test_device.py index e56244c1..a54c26e0 100644 --- a/src/cnaas_nms/api/tests/test_device.py +++ b/src/cnaas_nms/api/tests/test_device.py @@ -218,6 +218,43 @@ def test_put_stackmembers_dupe_hardware_id(self): result = self.client.put(f"/api/v1.0/device/{self.hostname}/stackmember", json=stackmember_data) self.assertEqual(result.status_code, 400) + def test_put_synchistory_event_valid(self): + data = { + "hostname": "eosaccess", + "cause": "unittest_cause", + "by": "unittest_user", + } + result = self.client.post("/api/v1.0/device_synchistory", json=data) + json_data = json.loads(result.data.decode()) + self.assertEqual(result.status_code, 200, msg=json_data) + self.assertEqual(len(json_data["data"].keys()), 4, msg=json_data) + + def test_put_synchistory_event_no_hostname(self): + data = { + "cause": "unittest_cause", + "by": "unittest_user", + } + result = self.client.post("/api/v1.0/device_synchistory", json=data) + self.assertEqual(result.status_code, 400) + + def test_put_synchistory_event_invalid_hostname(self): + data = { + "hostname": "devicethatdoesnotexist", + "cause": "unittest_cause", + "by": "unittest_user", + } + result = self.client.post("/api/v1.0/device_synchistory", json=data) + self.assertEqual(result.status_code, 400) + + def test_put_synchistory_event_invalid_timestamp(self): + data = { + "cause": "unittest_cause", + "by": "unittest_user", + "timestamp": "2023", + } + result = self.client.post("/api/v1.0/device_synchistory", json=data) + self.assertEqual(result.status_code, 400) + def test_get_synchistory(self): result = self.client.get("/api/v1.0/device_synchistory", query_string={"hostname": "eosaccess"}) self.assertEqual(result.status_code, 200, "Get synchistory for single device failed") From 0255e1a35a13a8f5e0f0c11f92a806fc9f4b0ec7 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 4 Jul 2023 14:26:25 +0200 Subject: [PATCH 127/169] fix add_sync_event argument order --- src/cnaas_nms/devicehandler/sync_history.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/sync_history.py b/src/cnaas_nms/devicehandler/sync_history.py index 772543f1..2fe5de0d 100644 --- a/src/cnaas_nms/devicehandler/sync_history.py +++ b/src/cnaas_nms/devicehandler/sync_history.py @@ -46,7 +46,7 @@ def redis_load(self, redis_dict: Dict[str, str]): def add_sync_event( - hostname: str, cause: str, timestamp: Optional[float] = None, by: Optional[str] = None, job_id: Optional[int] = None + hostname: str, cause: str, by: Optional[str] = None, job_id: Optional[int] = None, timestamp: Optional[float] = None ): try: if not by: From 303820562ad90a423ebc970f69efbf23ce884420 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 4 Jul 2023 14:31:22 +0200 Subject: [PATCH 128/169] don't add unsync event when dryrun discovers diff on already unsync device --- src/cnaas_nms/devicehandler/sync_devices.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index b1c2ee5b..b890d01f 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -911,8 +911,9 @@ def sync_devices( for hostname in changed_hosts: if dry_run: dev: Device = session.query(Device).filter(Device.hostname == hostname).one() - dev.synchronized = False - add_sync_event(hostname, "syncto_dryrun", scheduled_by, job_id) + if dev.synchronized: + dev.synchronized = False + add_sync_event(hostname, "syncto_dryrun", scheduled_by, job_id) dev.last_seen = datetime.datetime.utcnow() # if next job will commit, that job will mark synchronized on success elif get_confirm_mode(confirm_mode_override) != 2: From 7fe374da61488f53124b458f22ffb3a2b2b634e0 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 31 Jul 2023 09:56:41 +0200 Subject: [PATCH 129/169] send websocket message for sync events --- src/cnaas_nms/devicehandler/sync_history.py | 7 ++++--- src/cnaas_nms/run.py | 2 ++ src/cnaas_nms/tools/event.py | 14 ++++++++++++++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_history.py b/src/cnaas_nms/devicehandler/sync_history.py index 2fe5de0d..987e0536 100644 --- a/src/cnaas_nms/devicehandler/sync_history.py +++ b/src/cnaas_nms/devicehandler/sync_history.py @@ -7,6 +7,7 @@ from redis.exceptions import RedisError from cnaas_nms.db.session import redis_session +from cnaas_nms.tools.event import add_event from cnaas_nms.tools.log import get_logger REDIS_SYNC_HISTORY_KEYNAME = "sync_history" @@ -67,9 +68,9 @@ def add_sync_event( current_sync_events.append(sync_event) else: current_sync_events = [sync_event] - redis.hset( - REDIS_SYNC_HISTORY_KEYNAME, key=hostname, value=json.dumps([asdict(e) for e in current_sync_events]) - ) + json_data = json.dumps([asdict(e) for e in current_sync_events]) + redis.hset(REDIS_SYNC_HISTORY_KEYNAME, key=hostname, value=json_data) + add_event(event_type="sync", json_data=json_data) except RedisError as e: logger.exception(f"Redis Error while adding sync event (not critical): {e}") except Exception as e: diff --git a/src/cnaas_nms/run.py b/src/cnaas_nms/run.py index 38158341..a83e4ede 100644 --- a/src/cnaas_nms/run.py +++ b/src/cnaas_nms/run.py @@ -106,6 +106,8 @@ def emit_redis_event(event): socketio_emit(event["message"], loglevel_to_rooms(event["level"])) elif event["type"] == "update": socketio_emit(json.loads(event["json"]), ["update_{}".format(event["update_type"])]) + elif event["type"] == "sync": + socketio_emit(json.loads(event["json"]), ["sync"]) except Exception: # noqa: S110 pass diff --git a/src/cnaas_nms/tools/event.py b/src/cnaas_nms/tools/event.py index 755dd1c5..09b2b176 100644 --- a/src/cnaas_nms/tools/event.py +++ b/src/cnaas_nms/tools/event.py @@ -10,6 +10,18 @@ def add_event( update_type: Optional[str] = None, json_data: Optional[str] = None, ): + """ + + Args: + message: used for type "log", string with log message + event_type: Can be one of "log", "update" or "sync" + level: + update_type: + json_data: used with "update" or "sync", contains updated object or sync event + + Returns: + + """ with redis_session() as redis: try: send_data = {"type": event_type, "level": level} @@ -18,6 +30,8 @@ def add_event( elif event_type == "update": send_data["update_type"] = update_type send_data["json"] = json_data + elif event_type == "sync": + send_data["json"] = json_data redis.xadd("events", send_data, maxlen=100) except Exception as e: print("Error in add_event: {}".format(e)) From f3ed2b6ba928e9aa676eac1e1b218c7ebbfa2589 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 31 Jul 2023 14:21:38 +0200 Subject: [PATCH 130/169] join sync room if sync key exists with value 'all' --- src/cnaas_nms/api/app.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cnaas_nms/api/app.py b/src/cnaas_nms/api/app.py index 3f60c734..a69d0421 100644 --- a/src/cnaas_nms/api/app.py +++ b/src/cnaas_nms/api/app.py @@ -152,6 +152,8 @@ def socketio_on_events(data): room = data["loglevel"] elif "update" in data and data["update"] in ["device", "job"]: room = "update_{}".format(data["update"]) + elif "sync" in data and data["sync"] == "all": + room = "sync" else: return False # TODO: how to send error message to client? From 6d8bb5a6cea6a4dd9b148bf121cce2c6c933aa92 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 31 Jul 2023 14:36:47 +0200 Subject: [PATCH 131/169] send hostname and only new syncevent data --- src/cnaas_nms/devicehandler/sync_history.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/sync_history.py b/src/cnaas_nms/devicehandler/sync_history.py index 987e0536..3ac9202b 100644 --- a/src/cnaas_nms/devicehandler/sync_history.py +++ b/src/cnaas_nms/devicehandler/sync_history.py @@ -70,7 +70,10 @@ def add_sync_event( current_sync_events = [sync_event] json_data = json.dumps([asdict(e) for e in current_sync_events]) redis.hset(REDIS_SYNC_HISTORY_KEYNAME, key=hostname, value=json_data) - add_event(event_type="sync", json_data=json_data) + add_event( + event_type="sync", + json_data=json.dumps({"syncevent_hostname": hostname, "syncevent_data": asdict(sync_event)}), + ) except RedisError as e: logger.exception(f"Redis Error while adding sync event (not critical): {e}") except Exception as e: From 412d158489408373e91200b8b3060b98151df29b Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 1 Aug 2023 09:58:54 +0200 Subject: [PATCH 132/169] Make device unsynchronized and remove sync events when device is moved to unmanaged state --- src/cnaas_nms/db/device.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/cnaas_nms/db/device.py b/src/cnaas_nms/db/device.py index eea256da..86e3ad09 100644 --- a/src/cnaas_nms/db/device.py +++ b/src/cnaas_nms/db/device.py @@ -16,7 +16,7 @@ import cnaas_nms.db.site from cnaas_nms.db.interface import Interface, InterfaceConfigType from cnaas_nms.db.stackmember import Stackmember -from cnaas_nms.devicehandler.sync_history import add_sync_event +from cnaas_nms.devicehandler.sync_history import add_sync_event, remove_sync_events from cnaas_nms.tools.event import add_event @@ -347,6 +347,9 @@ def device_update(self, **kwargs): if error != []: return error for field in data: + if field == "state" and data[field] == DeviceState.UNMANAGED: + remove_sync_events(self.hostname) + self.synchronized = False setattr(self, field, data[field]) @classmethod From d01376ba315b360b268beeeb78e53d1456b23ff2 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 1 Aug 2023 10:02:24 +0200 Subject: [PATCH 133/169] Add was_unmanaged syncevent if device is moved from unmanaged to managed --- src/cnaas_nms/db/device.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cnaas_nms/db/device.py b/src/cnaas_nms/db/device.py index 86e3ad09..bbeb9e98 100644 --- a/src/cnaas_nms/db/device.py +++ b/src/cnaas_nms/db/device.py @@ -350,6 +350,8 @@ def device_update(self, **kwargs): if field == "state" and data[field] == DeviceState.UNMANAGED: remove_sync_events(self.hostname) self.synchronized = False + elif field == "state" and data[field] == DeviceState.MANAGED and self.state == DeviceState.UNMANAGED: + add_sync_event(self.hostname, "was_unmanaged") setattr(self, field, data[field]) @classmethod From e78002be3845bda14ec0b9c9661841442862a7fa Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 1 Aug 2023 10:13:28 +0200 Subject: [PATCH 134/169] Make was_unmanaged syncevent store the 'by' user --- src/cnaas_nms/api/device.py | 7 +++++++ src/cnaas_nms/db/device.py | 2 -- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/cnaas_nms/api/device.py b/src/cnaas_nms/api/device.py index 69c6d9c4..c29e8c9d 100644 --- a/src/cnaas_nms/api/device.py +++ b/src/cnaas_nms/api/device.py @@ -281,6 +281,7 @@ def put(self, device_id): json_data = request.get_json() with sqla_session() as session: dev: Device = session.query(Device).filter(Device.id == device_id).one_or_none() + dev_prev_state: DeviceState = dev.state if not dev: return empty_result(status="error", data=f"No device with id {device_id}"), 404 @@ -305,6 +306,12 @@ def put(self, device_id): return empty_result(status="error", data=msg), 500 if "synchronized" in json_data and json_data["synchronized"]: remove_sync_events(dev.hostname) + if ( + "state" in json_data + and json_data["state"].upper() == "UNMANAGED" + and dev_prev_state == DeviceState.MANAGED + ): + add_sync_event(dev.hostname, "was_unmanaged", by=get_jwt_identity()) session.commit() update_device_primary_groups() dev_dict = device_data_postprocess([dev])[0] diff --git a/src/cnaas_nms/db/device.py b/src/cnaas_nms/db/device.py index bbeb9e98..86e3ad09 100644 --- a/src/cnaas_nms/db/device.py +++ b/src/cnaas_nms/db/device.py @@ -350,8 +350,6 @@ def device_update(self, **kwargs): if field == "state" and data[field] == DeviceState.UNMANAGED: remove_sync_events(self.hostname) self.synchronized = False - elif field == "state" and data[field] == DeviceState.MANAGED and self.state == DeviceState.UNMANAGED: - add_sync_event(self.hostname, "was_unmanaged") setattr(self, field, data[field]) @classmethod From 1dba7dcf70061bbc9dfc5412b1aa30a617144b67 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 16 Aug 2023 15:49:42 +0200 Subject: [PATCH 135/169] use correct commit mode override values inside push_sync_device task --- src/cnaas_nms/devicehandler/sync_devices.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index b890d01f..a4035661 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -478,11 +478,11 @@ def napalm_confirm_commit(task, job_id: int, prev_job_id: int): def push_sync_device( task, + confirm_mode: int, dry_run: bool = True, generate_only: bool = False, job_id: Optional[str] = None, scheduled_by: Optional[str] = None, - confirm_mode_override: Optional[int] = None, ): """ Nornir task to generate config and push to device @@ -494,7 +494,7 @@ def push_sync_device( even do dry_run compare to running config job_id: Job ID integer scheduled_by: username of users that scheduled job - confirm_mode_override: integer to specify commit confirm mode + confirm_mode: integer to specify commit confirm mode Returns: """ @@ -550,19 +550,15 @@ def push_sync_device( } if dry_run: task_args["task"] = napalm_configure - elif api_settings.COMMIT_CONFIRMED_MODE == 0: + elif confirm_mode == 0: task_args["task"] = napalm_configure else: task_args["task"] = napalm_configure_confirmed task_args["job_id"] = job_id - task_args["confirm_mode_override"] = confirm_mode_override - logger.debug( - "Commit confirm mode for host {}: {} (dry_run: {})".format( - task.host.name, api_settings.COMMIT_CONFIRMED_MODE, dry_run - ) - ) + task_args["confirm_mode_override"] = confirm_mode + logger.debug("Commit confirm mode for host {}: {} (dry_run: {})".format(task.host.name, confirm_mode, dry_run)) task.run(**task_args) - if api_settings.COMMIT_CONFIRMED_MODE != 2: + if confirm_mode != 2: task.host.close_connection("napalm") if task.results[1].diff: @@ -593,7 +589,7 @@ def generate_only(hostname: str) -> (str, dict): if len(nr_filtered.inventory.hosts) != 1: raise ValueError("Invalid hostname: {}".format(hostname)) try: - nrresult = nr_filtered.run(task=push_sync_device, generate_only=True) + nrresult = nr_filtered.run(task=push_sync_device, generate_only=True, confirm_mode=0) if nrresult[hostname][0].failed: raise Exception( "Could not generate config for device {}: {}".format(hostname, nrresult[hostname][0].result) @@ -859,7 +855,7 @@ def sync_devices( task=push_sync_device, dry_run=dry_run, job_id=job_id, - confirm_mode_override=get_confirm_mode(confirm_mode_override), + confirm_mode=get_confirm_mode(confirm_mode_override), ) except Exception as e: logger.exception("Exception while synchronizing devices: {}".format(str(e))) From 96b0a677a856ac931b62573da08c84bcd50145b4 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 16 Aug 2023 16:26:07 +0200 Subject: [PATCH 136/169] fix confirm_mode option for syncto API --- src/cnaas_nms/api/device.py | 2 +- src/cnaas_nms/devicehandler/sync_devices.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cnaas_nms/api/device.py b/src/cnaas_nms/api/device.py index c29e8c9d..494e02a7 100644 --- a/src/cnaas_nms/api/device.py +++ b/src/cnaas_nms/api/device.py @@ -655,7 +655,7 @@ def post(self): if "ticket_ref" in json_data and isinstance(json_data["ticket_ref"], str): kwargs["job_ticket_ref"] = json_data["ticket_ref"] if "confirm_mode" in json_data and isinstance(json_data["confirm_mode"], int): - if 0 >= json_data["confirm_mode"] >= 2: + if 0 <= json_data["confirm_mode"] <= 2: kwargs["confirm_mode_override"] = json_data["confirm_mode"] else: return ( diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index a4035661..8c4819b8 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -539,7 +539,7 @@ def push_sync_device( "Synchronize device config for host: {} ({}:{})".format(task.host.name, task.host.hostname, task.host.port) ) - if api_settings.COMMIT_CONFIRMED_MODE != 2: + if confirm_mode != 2: task.host.open_connection("napalm", configuration=task.nornir.config) task_args = { "name": "Sync device config", From 97d29e46664afe0bbfed965c04373b0e8d91dc49 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 21 Aug 2023 09:41:41 +0200 Subject: [PATCH 137/169] Make factory default device also mark neighbors as unsync, and rollback if unsuccessful in deleting from database. Do logging with job_id --- src/cnaas_nms/devicehandler/erase.py | 41 +++++++++++++++++++--------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/src/cnaas_nms/devicehandler/erase.py b/src/cnaas_nms/devicehandler/erase.py index 577d697b..023bff8c 100644 --- a/src/cnaas_nms/devicehandler/erase.py +++ b/src/cnaas_nms/devicehandler/erase.py @@ -1,24 +1,26 @@ +from typing import Optional + from nornir_netmiko.tasks import netmiko_send_command from nornir_utils.plugins.functions import print_result +from sqlalchemy.exc import IntegrityError import cnaas_nms.devicehandler.nornir_helper from cnaas_nms.db.device import Device, DeviceState, DeviceType from cnaas_nms.db.session import sqla_session from cnaas_nms.devicehandler.nornir_helper import NornirJobResult +from cnaas_nms.devicehandler.sync_history import add_sync_event from cnaas_nms.scheduler.wrapper import job_wrapper from cnaas_nms.tools.log import get_logger -logger = get_logger() - -def device_erase_task(task, hostname: str) -> str: +def device_erase_task(task, hostname: str, job_id: int) -> str: + logger = get_logger() try: - res = task.run(netmiko_send_command, command_string="enable", expect_string=".*#", name="Enable") - + task.run(netmiko_send_command, command_string="enable", expect_string=".*#", name="Enable") res = task.run(netmiko_send_command, command_string="write erase now", expect_string=".*#", name="Write rase") print_result(res) except Exception as e: - logger.info("Failed to factory default device {}, reason: {}".format(task.host.name, e)) + logger.exception("Failed to factory default device {}, reason: {}".format(task.host.name, e)) raise Exception("Factory default device") # Remove cnaas device certificates if they are found @@ -48,8 +50,8 @@ def device_erase_task(task, hostname: str) -> str: @job_wrapper -def device_erase(device_id: int = None, job_id: int = None) -> NornirJobResult: - +def device_erase(device_id: int = None, job_id: int = None, scheduled_by: Optional[str] = None) -> NornirJobResult: + logger = get_logger() with sqla_session() as session: dev: Device = session.query(Device).filter(Device.id == device_id).one_or_none() if dev: @@ -69,13 +71,13 @@ def device_erase(device_id: int = None, job_id: int = None) -> NornirJobResult: nr_filtered = nr.filter(name=hostname) device_list = list(nr_filtered.inventory.hosts.keys()) - logger.info("Device selected: {}".format(device_list)) + logger.info("Device selected for factory default: {}".format(device_list)) try: - nrresult = nr_filtered.run(task=device_erase_task, hostname=hostname) + nrresult = nr_filtered.run(task=device_erase_task, hostname=hostname, job_id=job_id) print_result(nrresult) except Exception as e: - logger.exception("Exception while erasing device: {}".format(str(e))) + logger.exception("Exception while doing factory default of device: {}".format(str(e))) return NornirJobResult(nrresult=nrresult) failed_hosts = list(nrresult.failed_hosts.keys()) @@ -88,7 +90,20 @@ def device_erase(device_id: int = None, job_id: int = None) -> NornirJobResult: if failed_hosts == []: with sqla_session() as session: dev: Device = session.query(Device).filter(Device.id == device_id).one_or_none() - session.delete(dev) - session.commit() + try: + for nei in dev.get_neighbors(session): + nei.synchronized = False + add_sync_event(nei.hostname, "neighbor_deleted", scheduled_by) + except Exception as e: + logger.warning("Could not mark neighbor as unsync after deleting {}: {}".format(dev.hostname, e)) + try: + session.delete(dev) + session.commit() + except IntegrityError as e: + session.rollback() + logger.exception("Could not delete device because existing references: {}".format(e)) + except Exception as e: + session.rollback() + logger.exception("Could not delete device: {}".format(e)) return NornirJobResult(nrresult=nrresult) From 4bb5825347598592ed0483acf92d85940c80ae6d Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 21 Aug 2023 11:11:01 +0200 Subject: [PATCH 138/169] return job_id as separate field when submitting device delet job --- src/cnaas_nms/api/device.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cnaas_nms/api/device.py b/src/cnaas_nms/api/device.py index 494e02a7..1109fd9e 100644 --- a/src/cnaas_nms/api/device.py +++ b/src/cnaas_nms/api/device.py @@ -248,7 +248,9 @@ def delete(self, device_id): scheduled_by=get_jwt_identity(), kwargs={"device_id": device_id}, ) - return empty_result(data="Scheduled job {} to factory default device".format(job_id)) + res = empty_result(data="Scheduled job {} to factory default device".format(job_id)) + res["job_id"] = job_id + return res elif not isinstance(json_data["factory_default"], bool): return empty_result(status="error", data="Argument factory_default must be boolean"), 400 with sqla_session() as session: From 6ab80bf6e2ae2f7f567c5bd2d3bf7fd35e26aadb Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 21 Aug 2023 13:23:20 +0200 Subject: [PATCH 139/169] return http status 500 for device delete exceptions --- src/cnaas_nms/api/device.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/cnaas_nms/api/device.py b/src/cnaas_nms/api/device.py index 1109fd9e..f4f28fe9 100644 --- a/src/cnaas_nms/api/device.py +++ b/src/cnaas_nms/api/device.py @@ -268,12 +268,15 @@ def delete(self, device_id): session.commit() except IntegrityError as e: session.rollback() - return empty_result( - status="error", data="Could not remove device because existing references: {}".format(e) + return ( + empty_result( + status="error", data="Could not remove device because existing references: {}".format(e) + ), + 500, ) except Exception as e: session.rollback() - return empty_result(status="error", data="Could not remove device: {}".format(e)) + return empty_result(status="error", data="Could not remove device: {}".format(e)), 500 return empty_result(status="success", data={"deleted_device": dev.as_dict()}), 200 @jwt_required From 5a606aec6e31bcd17a5c5c16aed6253ef7c65433 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 23 Aug 2023 09:22:33 +0200 Subject: [PATCH 140/169] remove sync events when deleting a device --- src/cnaas_nms/api/device.py | 1 + src/cnaas_nms/devicehandler/erase.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cnaas_nms/api/device.py b/src/cnaas_nms/api/device.py index f4f28fe9..0be9cf92 100644 --- a/src/cnaas_nms/api/device.py +++ b/src/cnaas_nms/api/device.py @@ -258,6 +258,7 @@ def delete(self, device_id): if not dev: return empty_result("error", "Device not found"), 404 try: + remove_sync_events(dev.hostname) for nei in dev.get_neighbors(session): nei.synchronized = False add_sync_event(nei.hostname, "neighbor_deleted", get_jwt_identity()) diff --git a/src/cnaas_nms/devicehandler/erase.py b/src/cnaas_nms/devicehandler/erase.py index 023bff8c..30de381a 100644 --- a/src/cnaas_nms/devicehandler/erase.py +++ b/src/cnaas_nms/devicehandler/erase.py @@ -8,7 +8,7 @@ from cnaas_nms.db.device import Device, DeviceState, DeviceType from cnaas_nms.db.session import sqla_session from cnaas_nms.devicehandler.nornir_helper import NornirJobResult -from cnaas_nms.devicehandler.sync_history import add_sync_event +from cnaas_nms.devicehandler.sync_history import add_sync_event, remove_sync_events from cnaas_nms.scheduler.wrapper import job_wrapper from cnaas_nms.tools.log import get_logger @@ -90,6 +90,7 @@ def device_erase(device_id: int = None, job_id: int = None, scheduled_by: Option if failed_hosts == []: with sqla_session() as session: dev: Device = session.query(Device).filter(Device.id == device_id).one_or_none() + remove_sync_events(dev.hostname) try: for nei in dev.get_neighbors(session): nei.synchronized = False From 02e4dc25f43b70b50334ad7d2c824d4207ac9990 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 23 Aug 2023 09:27:34 +0200 Subject: [PATCH 141/169] Try fix for "Too many packets in payload" from engineio https://github.com/miguelgrinberg/python-engineio/issues/142#issuecomment-545822543 --- src/cnaas_nms/api/app.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cnaas_nms/api/app.py b/src/cnaas_nms/api/app.py index a69d0421..799cd61d 100644 --- a/src/cnaas_nms/api/app.py +++ b/src/cnaas_nms/api/app.py @@ -3,6 +3,7 @@ import sys from typing import Optional +from engineio.payload import Payload from flask import Flask, jsonify, request from flask_cors import CORS from flask_jwt_extended import JWTManager, decode_token @@ -84,6 +85,7 @@ def handle_error(self, e): resources={r"/api/*": {"origins": "*"}}, expose_headers=["Content-Type", "Authorization", "X-Total-Count", "Link"], ) +Payload.max_decode_packets = 500 socketio = SocketIO(app, cors_allowed_origins="*") if api_settings.JWT_ENABLED: From 92e980b8e4ab4bc85fbb2eaf897410a802f33114 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 23 Aug 2023 09:42:47 +0200 Subject: [PATCH 142/169] Add device_update websocket events for action CREATED and DELETED --- src/cnaas_nms/db/device.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/cnaas_nms/db/device.py b/src/cnaas_nms/db/device.py index 86e3ad09..b0771189 100644 --- a/src/cnaas_nms/db/device.py +++ b/src/cnaas_nms/db/device.py @@ -490,3 +490,17 @@ def after_update_device(mapper, connection, target: Device): update_data = {"action": "UPDATED", "device_id": target.id, "hostname": target.hostname, "object": target.as_dict()} json_data = json.dumps(update_data) add_event(json_data=json_data, event_type="update", update_type="device") + + +@event.listens_for(Device, "before_delete") +def before_delete_device(mapper, connection, target: Device): + update_data = {"action": "DELETED", "device_id": target.id, "hostname": target.hostname, "object": target.as_dict()} + json_data = json.dumps(update_data) + add_event(json_data=json_data, event_type="update", update_type="device") + + +@event.listens_for(Device, "after_insert") +def after_insert_device(mapper, connection, target: Device): + update_data = {"action": "CREATED", "device_id": target.id, "hostname": target.hostname, "object": target.as_dict()} + json_data = json.dumps(update_data) + add_event(json_data=json_data, event_type="update", update_type="device") From e079bc10205bad9ad032b2c3d711126642c4093d Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Tue, 29 Aug 2023 16:35:40 +0200 Subject: [PATCH 143/169] change to maintained fork --- .github/workflows/run-unit-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml index 6d795f92..81517ded 100644 --- a/.github/workflows/run-unit-tests.yml +++ b/.github/workflows/run-unit-tests.yml @@ -32,7 +32,7 @@ jobs: # - name: Create code coverage dir # run: mkdir -p docker/coverage/ # - - uses: satackey/action-docker-layer-caching@v0.0.11 + - uses: jpribyl/action-docker-layer-caching@v0.1.1 # Ignore the failure of a step and avoid terminating the job. continue-on-error: true From a926a90c5c43b3301e87a6e74876116567691c03 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Thu, 31 Aug 2023 16:09:02 +0200 Subject: [PATCH 144/169] Add API to query full running config from a device, or running config for a specific interface --- docs/apiref/devices.rst | 26 ++++++++++++++++ src/cnaas_nms/api/device.py | 30 +++++++++++++++++++ src/cnaas_nms/api/tests/test_device.py | 12 ++++++++ src/cnaas_nms/devicehandler/get.py | 27 +++++++++++++---- src/cnaas_nms/devicehandler/tests/test_get.py | 6 ++++ 5 files changed, 95 insertions(+), 6 deletions(-) diff --git a/docs/apiref/devices.rst b/docs/apiref/devices.rst index 4a016c2c..bbab20ae 100644 --- a/docs/apiref/devices.rst +++ b/docs/apiref/devices.rst @@ -190,6 +190,32 @@ This will return both the generated configuration based on the template for this device type, and also a list of available vaiables that could be used in the template. +Running config +-------------- + +To get the latest running config from the device you can use running_config: + +:: + + curl https://hostname/api/v1.0/device//running_config + +You can also specify to only get the running config for a specific interface: + +:: + + curl https://hostname/api/v1.0/device//running_config?interface=Ethernet1 + +Example output: + +:: + + { + "status": "success", + "data": { + "config": "no switchport\nvrf MGMT\nip address 10.100.2.101/24\nno lldp transmit\nno lldp receive" + } + } + View previous config -------------------- diff --git a/src/cnaas_nms/api/device.py b/src/cnaas_nms/api/device.py index 0be9cf92..bbfd2944 100644 --- a/src/cnaas_nms/api/device.py +++ b/src/cnaas_nms/api/device.py @@ -874,6 +874,35 @@ def get(self, hostname: str): return result +class DeviceRunningConfigApi(Resource): + @jwt_required + @device_api.param("interface") + def get(self, hostname: str): + args = request.args + result = empty_result() + result["data"] = {"config": None} + if not Device.valid_hostname(hostname): + return empty_result(status="error", data="Invalid hostname specified"), 400 + + with sqla_session() as session: + dev: Device = session.query(Device).filter(Device.hostname == hostname).one_or_none() + if not dev: + return empty_result("error", "Device not found"), 404 + + try: + if "interface" in args: + running_config = cnaas_nms.devicehandler.get.get_running_config_interface( + session, hostname, args["interface"] + ) + else: + running_config = cnaas_nms.devicehandler.get.get_running_config(hostname) + except Exception as e: + return empty_result("error", "Exception: {}".format(str(e))), 500 + + result["data"]["config"] = running_config + return result + + class DevicePreviousConfigApi(Resource): @jwt_required @device_api.param("job_id") @@ -1166,6 +1195,7 @@ def post(self): device_api.add_resource(DeviceByIdApi, "/") device_api.add_resource(DeviceByHostnameApi, "/") device_api.add_resource(DeviceConfigApi, "//generate_config") +device_api.add_resource(DeviceRunningConfigApi, "//running_config") device_api.add_resource(DevicePreviousConfigApi, "//previous_config") device_api.add_resource(DeviceApplyConfigApi, "//apply_config") device_api.add_resource(DeviceApi, "") diff --git a/src/cnaas_nms/api/tests/test_device.py b/src/cnaas_nms/api/tests/test_device.py index a54c26e0..1e307378 100644 --- a/src/cnaas_nms/api/tests/test_device.py +++ b/src/cnaas_nms/api/tests/test_device.py @@ -263,6 +263,18 @@ def test_get_synchistory(self): self.assertEqual(result.status_code, 200, "Get synchistory for all devices failed") self.assertTrue("data" in result.json) + @pytest.mark.equipment + def test_get_running_config(self): + hostname = self.testdata["managed_dist"] + result = self.client.get(f"/api/v1.0/device/{hostname}/running_config") + self.assertEqual(result.status_code, 200, "Get running config failed") + + @pytest.mark.equipment + def test_get_running_config_interface(self): + hostname = self.testdata["managed_dist"] + result = self.client.get(f"/api/v1.0/device/{hostname}/running_config", query_string={"interface": "Ethernet1"}) + self.assertEqual(result.status_code, 200, "Get running config interface failed") + if __name__ == "__main__": unittest.main() diff --git a/src/cnaas_nms/devicehandler/get.py b/src/cnaas_nms/devicehandler/get.py index 92126636..040d38d1 100644 --- a/src/cnaas_nms/devicehandler/get.py +++ b/src/cnaas_nms/devicehandler/get.py @@ -2,6 +2,8 @@ import re from typing import Dict, List, Optional +from netutils.config import compliance +from netutils.lib_mapper import NAPALM_LIB_MAPPER from nornir.core.filter import F from nornir.core.task import AggregatedResult from nornir_napalm.plugins.tasks import napalm_get @@ -11,6 +13,7 @@ from cnaas_nms.db.device import Device, DeviceType from cnaas_nms.db.device_vars import expand_interface_settings from cnaas_nms.db.interface import Interface, InterfaceConfigType, InterfaceError +from cnaas_nms.db.session import sqla_session from cnaas_nms.tools.log import get_logger @@ -19,14 +22,26 @@ def get_inventory(): return nr.dict()["inventory"] -def get_running_config(hostname): +def get_running_config(hostname: str) -> Optional[str]: nr = cnaas_nms.devicehandler.nornir_helper.cnaas_init() - if hostname: - nr_filtered = nr.filter(name=hostname).filter(managed=True) - else: - nr_filtered = nr.filter(managed=True) + nr_filtered = nr.filter(name=hostname).filter(managed=True) nr_result = nr_filtered.run(task=napalm_get, getters=["config"]) - return nr_result[hostname].result + if nr_result[hostname].failed: + raise nr_result[hostname][0].exception + else: + return nr_result[hostname].result["config"]["running"] + + +def get_running_config_interface(session: sqla_session, hostname: str, interface: str) -> str: + running_config = get_running_config(hostname) + dev: Device = session.query(Device).filter(Device.hostname == hostname).one() + os_parser = compliance.parser_map[NAPALM_LIB_MAPPER.get(dev.platform)] + config_parsed = os_parser(running_config) + ret = [] + for line in config_parsed.config_lines: + if f"interface {interface}" in line.parents: + ret.append(line.config_line.strip()) + return "\n".join(ret) def calc_config_hash(hostname, config): diff --git a/src/cnaas_nms/devicehandler/tests/test_get.py b/src/cnaas_nms/devicehandler/tests/test_get.py index a2a2cd96..9b7d103e 100644 --- a/src/cnaas_nms/devicehandler/tests/test_get.py +++ b/src/cnaas_nms/devicehandler/tests/test_get.py @@ -89,6 +89,12 @@ def test_update_links(self): ) pprint.pprint(new_links) + @pytest.mark.equipment + def test_get_running_config_interface(self): + with sqla_session() as session: + if_config: str = cnaas_nms.devicehandler.get.get_running_config_interface(session, "eosdist1", "Ethernet1") + assert if_config.strip(), "no config found" + if __name__ == "__main__": unittest.main() From f584c2f9b3afa0b3e15c501bbfaa37cece0b7c40 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 6 Sep 2023 18:44:19 +0000 Subject: [PATCH 145/169] Bump gitpython from 3.1.32 to 3.1.34 Bumps [gitpython](https://github.com/gitpython-developers/GitPython) from 3.1.32 to 3.1.34. - [Release notes](https://github.com/gitpython-developers/GitPython/releases) - [Changelog](https://github.com/gitpython-developers/GitPython/blob/main/CHANGES) - [Commits](https://github.com/gitpython-developers/GitPython/compare/3.1.32...3.1.34) --- updated-dependencies: - dependency-name: gitpython dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9e21f63f..65c54da8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ flask-restx==1.0.5 flask==2.2.5 Flask-SocketIO==5.3.1 gevent==21.12.0 -GitPython==3.1.32 +GitPython==3.1.34 mypy==0.971 mypy-extensions==0.4.3 markupsafe==2.1.1 From ffe826b1ea0b13ed67299fc55fd34ce873c1c87d Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 8 Sep 2023 09:17:08 +0200 Subject: [PATCH 146/169] If commit confirmed mode 2 is used and all devices succeed with push, but one or more devices are unreachable after the commit call succeeded, devices that are reachable after will still get confirmed even though they should get rolled back. Add extra ping check to fix this. --- src/cnaas_nms/devicehandler/sync_devices.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 8c4819b8..31f0b71e 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -11,7 +11,7 @@ from nornir.core import Nornir from nornir.core.task import MultiResult, Result from nornir_jinja2.plugins.tasks import template_file -from nornir_napalm.plugins.tasks import napalm_configure, napalm_get +from nornir_napalm.plugins.tasks import napalm_configure, napalm_get, napalm_ping from nornir_utils.plugins.functions import print_result import cnaas_nms.db.helper @@ -560,6 +560,9 @@ def push_sync_device( task.run(**task_args) if confirm_mode != 2: task.host.close_connection("napalm") + if confirm_mode == 2: + time.sleep(1) + task.run(task=napalm_ping, name="Verify reachability") if task.results[1].diff: config = task.results[1].host["config"] From 66f245adfea72e59cf7e1b96fecc908992757dcc Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 8 Sep 2023 09:32:27 +0200 Subject: [PATCH 147/169] use get facts since ping is about pinging another external device --- src/cnaas_nms/devicehandler/sync_devices.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 31f0b71e..0da17889 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -11,7 +11,7 @@ from nornir.core import Nornir from nornir.core.task import MultiResult, Result from nornir_jinja2.plugins.tasks import template_file -from nornir_napalm.plugins.tasks import napalm_configure, napalm_get, napalm_ping +from nornir_napalm.plugins.tasks import napalm_configure, napalm_get from nornir_utils.plugins.functions import print_result import cnaas_nms.db.helper @@ -562,7 +562,7 @@ def push_sync_device( task.host.close_connection("napalm") if confirm_mode == 2: time.sleep(1) - task.run(task=napalm_ping, name="Verify reachability") + task.run(task=napalm_get, getters=["facts"], name="Verify reachability") if task.results[1].diff: config = task.results[1].host["config"] From d2a56e9dc994f018fc240bf7ee5e9f4101b93833 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 8 Sep 2023 10:05:34 +0200 Subject: [PATCH 148/169] Log error device not reachable after commit --- src/cnaas_nms/devicehandler/sync_devices.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 0da17889..291c0024 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -560,9 +560,22 @@ def push_sync_device( task.run(**task_args) if confirm_mode != 2: task.host.close_connection("napalm") - if confirm_mode == 2: + if confirm_mode == 2 and not dry_run: time.sleep(1) task.run(task=napalm_get, getters=["facts"], name="Verify reachability") + if task.results[2].failed: + logger.error( + "Could not reach device {} after commit, rollback in: {}s".format( + task.host.name, api_settings.COMMIT_CONFIRMED_TIMEOUT + ) + ) + else: + short_facts = {"fqdn": "unknown"} + try: + short_facts["fqdn"] = task.results[2].result["facts"]["fqdn"] + task.results[2].result["facts"] = short_facts + except Exception: + pass if task.results[1].diff: config = task.results[1].host["config"] From 1c6c73695a22eb8ab06b254b12e1934f44fce45f Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 8 Sep 2023 10:15:50 +0200 Subject: [PATCH 149/169] task.run fails with exception, so catch that instead of checking fail status --- src/cnaas_nms/devicehandler/sync_devices.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 291c0024..5a8d235d 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -562,13 +562,15 @@ def push_sync_device( task.host.close_connection("napalm") if confirm_mode == 2 and not dry_run: time.sleep(1) - task.run(task=napalm_get, getters=["facts"], name="Verify reachability") - if task.results[2].failed: + try: + task.run(task=napalm_get, getters=["facts"], name="Verify reachability") + except Exception as e: logger.error( "Could not reach device {} after commit, rollback in: {}s".format( task.host.name, api_settings.COMMIT_CONFIRMED_TIMEOUT ) ) + raise e else: short_facts = {"fqdn": "unknown"} try: From 97367a9dd9e149c5220d02dd26dccf5815410b2f Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 8 Sep 2023 10:22:51 +0200 Subject: [PATCH 150/169] Configurable time to wait between comitting configuration and checking that the device is still reachable --- docker/api/config/api.yml | 1 + docs/configuration/index.rst | 2 ++ src/cnaas_nms/app_settings.py | 2 ++ src/cnaas_nms/devicehandler/sync_devices.py | 2 +- 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docker/api/config/api.yml b/docker/api/config/api.yml index 32a87fcd..9f52fcf6 100644 --- a/docker/api/config/api.yml +++ b/docker/api/config/api.yml @@ -12,3 +12,4 @@ mgmtdomain_reserved_count: 5 mgmtdomain_primary_ip_version: 4 commit_confirmed_mode: 1 commit_confirmed_timeout: 300 +commit_confirmed_wait: 1 diff --git a/docs/configuration/index.rst b/docs/configuration/index.rst index 6221622b..4ca4dcc4 100644 --- a/docs/configuration/index.rst +++ b/docs/configuration/index.rst @@ -45,6 +45,8 @@ Defines parameters for the API: (see :ref:`commit_confirm_modes`). Defaults to 1. - commit_confirmed_timeout: Time to wait before rolling back an unconfirmed commit, specified in seconds. Defaults to 300. +- commit_confirmed_wait: Time to wait between comitting configuration and checking + that the device is still reachable, specified in seconds. Defaults to 1. /etc/cnaas-nms/repository.yml ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/cnaas_nms/app_settings.py b/src/cnaas_nms/app_settings.py index 4d1622b2..cbc77621 100644 --- a/src/cnaas_nms/app_settings.py +++ b/src/cnaas_nms/app_settings.py @@ -52,6 +52,7 @@ class ApiSettings(BaseSettings): MGMTDOMAIN_PRIMARY_IP_VERSION: int = 4 COMMIT_CONFIRMED_MODE: int = 1 COMMIT_CONFIRMED_TIMEOUT: int = 300 + COMMIT_CONFIRMED_WAIT: int = 1 SETTINGS_OVERRIDE: Optional[dict] = None @validator("MGMTDOMAIN_PRIMARY_IP_VERSION") @@ -90,6 +91,7 @@ def construct_api_settings() -> ApiSettings: MGMTDOMAIN_PRIMARY_IP_VERSION=config.get("mgmtdomain_primary_ip_version", 4), COMMIT_CONFIRMED_MODE=config.get("commit_confirmed_mode", 1), COMMIT_CONFIRMED_TIMEOUT=config.get("commit_confirmed_timeout", 300), + COMMIT_CONFIRMED_WAIT=config.get("commit_confirmed_wait", 1), SETTINGS_OVERRIDE=config.get("settings_override", None), ) else: diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 5a8d235d..76546d33 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -561,7 +561,7 @@ def push_sync_device( if confirm_mode != 2: task.host.close_connection("napalm") if confirm_mode == 2 and not dry_run: - time.sleep(1) + time.sleep(api_settings.COMMIT_CONFIRMED_WAIT) try: task.run(task=napalm_get, getters=["facts"], name="Verify reachability") except Exception as e: From 897cb2f9bfb5add4e83e0ea1676482be6d13b92d Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 8 Sep 2023 16:04:22 +0200 Subject: [PATCH 151/169] Make sure commit confirm 2 checks for failed hosts before hosts with empty diff, otherwise wrong message is displayed and job lock is released too early. Add sync event immediately when device is unreachable after commit --- src/cnaas_nms/devicehandler/sync_devices.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 76546d33..1a4cd544 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -565,6 +565,7 @@ def push_sync_device( try: task.run(task=napalm_get, getters=["facts"], name="Verify reachability") except Exception as e: + add_sync_event(task.host.name, "commit_confirm_failed", scheduled_by, job_id) logger.error( "Could not reach device {} after commit, rollback in: {}s".format( task.host.name, api_settings.COMMIT_CONFIRMED_TIMEOUT @@ -976,11 +977,7 @@ def sync_devices( f"{total_change_score} is higher than auto-push limit {AUTOPUSH_MAX_SCORE}" ) elif get_confirm_mode(confirm_mode_override) == 2 and not dry_run: - if not changed_hosts: - logger.info("None of the selected host has any changes (diff), skipping commit-confirm") - logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) - Joblock.release_lock(session, job_id=job_id) - elif len(failed_hosts) > 0: + if len(failed_hosts) > 0: logger.error( "No confirm job scheduled since one or more devices failed in commitmode 2" ", all devices will rollback in {}s".format(api_settings.COMMIT_CONFIRMED_TIMEOUT) @@ -988,6 +985,10 @@ def sync_devices( time.sleep(api_settings.COMMIT_CONFIRMED_TIMEOUT) logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) Joblock.release_lock(session, job_id=job_id) + elif not changed_hosts: + logger.info("None of the selected host has any changes (diff), skipping commit-confirm") + logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) + Joblock.release_lock(session, job_id=job_id) else: scheduler = Scheduler() next_job_id = scheduler.add_onetime_job( From 1c6f069e9b6ed4e922472e08976420bbfa2b4a61 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 8 Sep 2023 16:17:28 +0200 Subject: [PATCH 152/169] Fix for missing log events, before events added between two xread calls would not have been read? --- src/cnaas_nms/run.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/cnaas_nms/run.py b/src/cnaas_nms/run.py index a83e4ede..55d8dbde 100644 --- a/src/cnaas_nms/run.py +++ b/src/cnaas_nms/run.py @@ -93,7 +93,7 @@ def loglevel_to_rooms(levelname: str) -> List[str]: def parse_redis_event(event): try: - # [stream, [(messageid, {datadict})] + # [stream, [(messageid, {datadict})]] if event[0] == "events": return event[1][0][1] except Exception: # noqa: S110 @@ -115,13 +115,19 @@ def emit_redis_event(event): def thread_websocket_events(): redis: StrictRedis with redis_session() as redis: + last_event = b"$" while True: - result = redis.xread({"events": b"$"}, count=10, block=200) + result = redis.xread({"events": last_event}, count=10, block=200) for item in result: event = parse_redis_event(item) if not event: continue emit_redis_event(event) + try: + # [stream, [(messageid, {datadict})]] + last_event = item[1][0][0] + except Exception: # noqa: S110 + last_event = b"$" if stop_websocket_threads: break From 39c98ad8722aa61308be77c05604b2d949243339 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 11 Sep 2023 16:45:10 +0200 Subject: [PATCH 153/169] workaround for scheduled_by arg not passed to job wrapped functions --- src/cnaas_nms/scheduler/scheduler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cnaas_nms/scheduler/scheduler.py b/src/cnaas_nms/scheduler/scheduler.py index fc9300f5..a671b242 100644 --- a/src/cnaas_nms/scheduler/scheduler.py +++ b/src/cnaas_nms/scheduler/scheduler.py @@ -212,6 +212,7 @@ def add_onetime_job( kwargs["job_id"] = job_id kwargs["scheduled_by"] = scheduled_by + kwargs["kwargs"]["scheduled_by"] = scheduled_by if self.use_mule: try: import uwsgi From 769276b9baa517b858cb3c34c555bb0bba84bea1 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 11 Sep 2023 16:45:52 +0200 Subject: [PATCH 154/169] better error handling of napalm_configure fails --- src/cnaas_nms/devicehandler/sync_devices.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 1a4cd544..74b917c2 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -557,9 +557,14 @@ def push_sync_device( task_args["job_id"] = job_id task_args["confirm_mode_override"] = confirm_mode logger.debug("Commit confirm mode for host {}: {} (dry_run: {})".format(task.host.name, confirm_mode, dry_run)) - task.run(**task_args) - if confirm_mode != 2: - task.host.close_connection("napalm") + try: + task.run(**task_args) + except Exception as e: + logger.exception("Exception while running task napalm_configure for device {}".format(task.host.name)) + raise e + finally: + if confirm_mode != 2: + task.host.close_connection("napalm") if confirm_mode == 2 and not dry_run: time.sleep(api_settings.COMMIT_CONFIRMED_WAIT) try: @@ -874,6 +879,7 @@ def sync_devices( task=push_sync_device, dry_run=dry_run, job_id=job_id, + scheduled_by=scheduled_by, confirm_mode=get_confirm_mode(confirm_mode_override), ) except Exception as e: @@ -900,7 +906,7 @@ def sync_devices( unchanged_hosts = [] # calculate change impact score for host, results in nrresult.items(): - if host in failed_hosts or len(results) != 3: + if host in failed_hosts or len(results) < 3: logger.debug("Unable to calculate change score for failed device {}".format(host)) elif results[2].diff: changed_hosts.append(host) From 6343563bcabe0ff1deb13ba0486e8ecb967b0d37 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 11 Sep 2023 17:09:47 +0200 Subject: [PATCH 155/169] wait to release device lock if one or more devices failed configuration with commit confirmed mode 1 --- src/cnaas_nms/devicehandler/sync_devices.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 74b917c2..3a1dbf5a 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -949,6 +949,12 @@ def sync_devices( dev.last_seen = datetime.datetime.utcnow() if not dry_run and get_confirm_mode(confirm_mode_override) != 2: logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) + if failed_hosts: + logger.error( + "One or more devices failed to commit configuration, they will roll back configuration" + " in {}s: {}".format(api_settings.COMMIT_CONFIRMED_TIMEOUT, ", ".join(failed_hosts)) + ) + time.sleep(api_settings.COMMIT_CONFIRMED_TIMEOUT) Joblock.release_lock(session, job_id=job_id) if len(device_list) == 0: @@ -983,7 +989,7 @@ def sync_devices( f"{total_change_score} is higher than auto-push limit {AUTOPUSH_MAX_SCORE}" ) elif get_confirm_mode(confirm_mode_override) == 2 and not dry_run: - if len(failed_hosts) > 0: + if failed_hosts: logger.error( "No confirm job scheduled since one or more devices failed in commitmode 2" ", all devices will rollback in {}s".format(api_settings.COMMIT_CONFIRMED_TIMEOUT) From 33cacd7dee3ff59a9f6d9fc3ed4712d0fc059c88 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 11 Sep 2023 17:20:57 +0200 Subject: [PATCH 156/169] Fix the order of printing of log message to release job lock --- src/cnaas_nms/devicehandler/sync_devices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 3a1dbf5a..58050d35 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -948,13 +948,13 @@ def sync_devices( remove_sync_events(hostname) dev.last_seen = datetime.datetime.utcnow() if not dry_run and get_confirm_mode(confirm_mode_override) != 2: - logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) if failed_hosts: logger.error( "One or more devices failed to commit configuration, they will roll back configuration" " in {}s: {}".format(api_settings.COMMIT_CONFIRMED_TIMEOUT, ", ".join(failed_hosts)) ) time.sleep(api_settings.COMMIT_CONFIRMED_TIMEOUT) + logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) Joblock.release_lock(session, job_id=job_id) if len(device_list) == 0: From 652cfca25d24c39fe09fba8fdf640e4509844e05 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 18 Sep 2023 11:22:00 +0200 Subject: [PATCH 157/169] Support junos on get_running_config_interface, and remove first whitespace level --- src/cnaas_nms/devicehandler/get.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/cnaas_nms/devicehandler/get.py b/src/cnaas_nms/devicehandler/get.py index 040d38d1..12b2ed7f 100644 --- a/src/cnaas_nms/devicehandler/get.py +++ b/src/cnaas_nms/devicehandler/get.py @@ -38,9 +38,18 @@ def get_running_config_interface(session: sqla_session, hostname: str, interface os_parser = compliance.parser_map[NAPALM_LIB_MAPPER.get(dev.platform)] config_parsed = os_parser(running_config) ret = [] + leading_whitespace: Optional[int] = None for line in config_parsed.config_lines: - if f"interface {interface}" in line.parents: - ret.append(line.config_line.strip()) + find_pattern = f"interface {interface}" + if dev.platform == "junos": + find_pattern = f" {interface} {{" + if find_pattern in line.parents: + try: + if not leading_whitespace: + leading_whitespace = len(line.config_line) - len(line.config_line.lstrip(" ")) + ret.append(line.config_line[leading_whitespace:]) + except Exception: + ret.append(line.config_line.strip()) return "\n".join(ret) From f4aa4bce5b71e23c28547bfcb93d5260f4da86fe Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Sep 2023 10:47:02 +0000 Subject: [PATCH 158/169] Bump gitpython from 3.1.34 to 3.1.35 Bumps [gitpython](https://github.com/gitpython-developers/GitPython) from 3.1.34 to 3.1.35. - [Release notes](https://github.com/gitpython-developers/GitPython/releases) - [Changelog](https://github.com/gitpython-developers/GitPython/blob/main/CHANGES) - [Commits](https://github.com/gitpython-developers/GitPython/compare/3.1.34...3.1.35) --- updated-dependencies: - dependency-name: gitpython dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 65c54da8..335cecf5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ flask-restx==1.0.5 flask==2.2.5 Flask-SocketIO==5.3.1 gevent==21.12.0 -GitPython==3.1.34 +GitPython==3.1.35 mypy==0.971 mypy-extensions==0.4.3 markupsafe==2.1.1 From b59c59aa3f30237e8cf2180ac61f308a4be2e23f Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 18 Sep 2023 12:49:16 +0200 Subject: [PATCH 159/169] Add tags support for dist interfaces --- docs/reporef/index.rst | 1 + src/cnaas_nms/db/settings_fields.py | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/reporef/index.rst b/docs/reporef/index.rst index 3ad647be..b295d1d0 100644 --- a/docs/reporef/index.rst +++ b/docs/reporef/index.rst @@ -403,6 +403,7 @@ Keys for interfaces.yml or interfaces_.yml: * enabled: Optional. Set the administrative state of the interface. Defaults to true if not set. * aggregate_id: Optional. Identifier for configuring LACP etc. Integer value. Special value -1 means configure MLAG and use ID based on indexnum. + * tags: Optional list of strings, custom user defined tags to apply. * cli_append_str: Optional. Custom configuration to append to this interface. The "downlink" ifclass is used on DIST devices to specify that this interface diff --git a/src/cnaas_nms/db/settings_fields.py b/src/cnaas_nms/db/settings_fields.py index 51cb0eab..aafa9a8f 100644 --- a/src/cnaas_nms/db/settings_fields.py +++ b/src/cnaas_nms/db/settings_fields.py @@ -138,6 +138,7 @@ class f_interface(BaseModel): untagged_vlan: Optional[int] = vlan_id_schema_optional tagged_vlan_list: Optional[List[int]] = None aggregate_id: Optional[int] = None + tags: Optional[List[str]] = None cli_append_str: str = "" @validator("tagged_vlan_list", each_item=True) From 0964fcbaa9f91ae7e4f0dd8941aa6a335f9d7a44 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 18 Sep 2023 12:50:43 +0200 Subject: [PATCH 160/169] Fix unattached syncevents after init of new device --- src/cnaas_nms/devicehandler/init_device.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/init_device.py b/src/cnaas_nms/devicehandler/init_device.py index fad5ede5..4f56e46f 100644 --- a/src/cnaas_nms/devicehandler/init_device.py +++ b/src/cnaas_nms/devicehandler/init_device.py @@ -27,7 +27,7 @@ from cnaas_nms.devicehandler.cert import arista_copy_cert from cnaas_nms.devicehandler.nornir_helper import NornirJobResult, get_jinja_env from cnaas_nms.devicehandler.sync_devices import confcheck_devices, populate_device_vars -from cnaas_nms.devicehandler.sync_history import add_sync_event +from cnaas_nms.devicehandler.sync_history import add_sync_event, remove_sync_events from cnaas_nms.devicehandler.update import set_facts, update_interfacedb_worker, update_linknets from cnaas_nms.plugins.pluginmanager import PluginManagerHandler from cnaas_nms.scheduler.scheduler import Scheduler @@ -548,6 +548,7 @@ def init_access_device_step1( dev.hostname = new_hostname session.commit() hostname = dev.hostname + remove_sync_events(old_hostname) # Rebuild settings caches to make sure group memberships are updated after # setting new hostname From 550e368ee764edfe1b3c19bf8f70e1886974eff7 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 18 Sep 2023 16:29:59 +0200 Subject: [PATCH 161/169] Option to filter output from generate only api call --- src/cnaas_nms/api/device.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/cnaas_nms/api/device.py b/src/cnaas_nms/api/device.py index bbfd2944..ac99cd9b 100644 --- a/src/cnaas_nms/api/device.py +++ b/src/cnaas_nms/api/device.py @@ -846,8 +846,12 @@ def post(self): class DeviceConfigApi(Resource): @jwt_required + @device_api.param("variables_only", "Only return available variables") + @device_api.param("interface_variables_only", "Only return available interface variables") + @device_api.param("config_only", "Only return full generated config") def get(self, hostname: str): """Get device configuration""" + args = request.args result = empty_result() result["data"] = {"config": None} if not Device.valid_hostname(hostname): @@ -861,6 +865,15 @@ def get(self, hostname: str): "generated_config": config, "available_variables": template_vars, } + if "variables_only" in args and args["variables_only"]: + del result["data"]["config"]["generated_config"] + elif "interface_variables_only" in args and args["interface_variables_only"]: + del result["data"]["config"]["generated_config"] + interface_variables = result["data"]["available_variables"]["interfaces"] + result["data"]["available_variables"] = {"interfaces": interface_variables} + elif "config_only" in args and args["config_only"]: + del result["data"]["available_variables"] + except Exception as e: logger.exception(f"Exception while generating config for device {hostname}") return ( From b0fd8b2d1ff6852d93e3b87c7d3a4c958e396a0e Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 18 Sep 2023 16:32:38 +0200 Subject: [PATCH 162/169] Option to filter output from generate only api call --- src/cnaas_nms/api/device.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cnaas_nms/api/device.py b/src/cnaas_nms/api/device.py index ac99cd9b..d3a0df8a 100644 --- a/src/cnaas_nms/api/device.py +++ b/src/cnaas_nms/api/device.py @@ -869,10 +869,10 @@ def get(self, hostname: str): del result["data"]["config"]["generated_config"] elif "interface_variables_only" in args and args["interface_variables_only"]: del result["data"]["config"]["generated_config"] - interface_variables = result["data"]["available_variables"]["interfaces"] - result["data"]["available_variables"] = {"interfaces": interface_variables} + interface_variables = result["data"]["config"]["available_variables"]["interfaces"] + result["data"]["config"]["available_variables"] = {"interfaces": interface_variables} elif "config_only" in args and args["config_only"]: - del result["data"]["available_variables"] + del result["data"]["config"]["available_variables"] except Exception as e: logger.exception(f"Exception while generating config for device {hostname}") From fe3be7e3e90b033cedc4ef23b529f8acbd20370e Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 20 Sep 2023 09:54:26 +0200 Subject: [PATCH 163/169] Use field masks from flask_restx marshal to do "partial object fetching" using X-Fields header instead of custom query params, hopefully a bit more standard and more flexible Example use curl -H "X-Fields: available_variables{interfaces},hostname" to only retreive hostname and variables regarding interfaces --- src/cnaas_nms/api/device.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/cnaas_nms/api/device.py b/src/cnaas_nms/api/device.py index d3a0df8a..7de7d721 100644 --- a/src/cnaas_nms/api/device.py +++ b/src/cnaas_nms/api/device.py @@ -3,7 +3,7 @@ from typing import List, Optional from flask import make_response, request -from flask_restx import Namespace, Resource, fields +from flask_restx import Namespace, Resource, fields, marshal from pydantic import ValidationError from sqlalchemy import func from sqlalchemy.exc import IntegrityError @@ -171,6 +171,15 @@ }, ) +device_generate_config_model = device_api.model( + "generate_config", + { + "hostname": fields.String, + "generated_config": fields.String, + "available_variables": fields.Raw, + }, +) + stackmember_model = device_api.model( "stackmember", { @@ -844,14 +853,11 @@ def post(self): return resp -class DeviceConfigApi(Resource): +class DeviceGenerateConfigApi(Resource): @jwt_required - @device_api.param("variables_only", "Only return available variables") - @device_api.param("interface_variables_only", "Only return available interface variables") - @device_api.param("config_only", "Only return full generated config") + @device_api.doc(model=device_generate_config_model) def get(self, hostname: str): """Get device configuration""" - args = request.args result = empty_result() result["data"] = {"config": None} if not Device.valid_hostname(hostname): @@ -860,19 +866,13 @@ def get(self, hostname: str): try: config, template_vars = cnaas_nms.devicehandler.sync_devices.generate_only(hostname) template_vars["host"] = hostname - result["data"]["config"] = { + data = { "hostname": hostname, "generated_config": config, "available_variables": template_vars, } - if "variables_only" in args and args["variables_only"]: - del result["data"]["config"]["generated_config"] - elif "interface_variables_only" in args and args["interface_variables_only"]: - del result["data"]["config"]["generated_config"] - interface_variables = result["data"]["config"]["available_variables"]["interfaces"] - result["data"]["config"]["available_variables"] = {"interfaces": interface_variables} - elif "config_only" in args and args["config_only"]: - del result["data"]["config"]["available_variables"] + + result["data"]["config"] = marshal(data, device_generate_config_model, mask=request.headers.get("X-Fields")) except Exception as e: logger.exception(f"Exception while generating config for device {hostname}") @@ -1207,7 +1207,7 @@ def post(self): # Devices device_api.add_resource(DeviceByIdApi, "/") device_api.add_resource(DeviceByHostnameApi, "/") -device_api.add_resource(DeviceConfigApi, "//generate_config") +device_api.add_resource(DeviceGenerateConfigApi, "//generate_config") device_api.add_resource(DeviceRunningConfigApi, "//running_config") device_api.add_resource(DevicePreviousConfigApi, "//previous_config") device_api.add_resource(DeviceApplyConfigApi, "//apply_config") From a03d808da4a1d1bc107c312228e5768169720365 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 20 Sep 2023 12:50:51 +0200 Subject: [PATCH 164/169] Changelog v1.5 --- docs/changelog/index.rst | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/docs/changelog/index.rst b/docs/changelog/index.rst index ca7941d1..7eb38f41 100644 --- a/docs/changelog/index.rst +++ b/docs/changelog/index.rst @@ -1,6 +1,36 @@ Changelog ========= +Version 1.5.0 +------------- + +New features: + + - Add commit confirm modes: mode 0 - no commit confirm (previous behavior), mode 1 - confirm each device individually + after it has commited, mode 2 - confirm devices only when all devices in the job has comitted successfully + - Add API to query configured API settings + - New settings for: + * users: username, ssh_key, password_hash etc to handle device user accounts + * dot1x_multi_host, poe_reboot_maintain + * prefix_sets and routing_policies to describe policies for router peerings etc + - Sync history events, save what event caused devices to become unsynchronized + - API to query running config + +Bug fixes: + + - Don't return duplicates of neighbor entries + - Fix error "Error in repository: HEAD is a detached" when refreshing settings repo + - Mark init neighbors as unsync if they have local changes + - Log events was not always sent from redis to websocket + +Version 1.4.2 +------------- + +Bug fixes: + + - Fix ZTP of access switch connected to another access switch with type downlink but empty data + + Version 1.4.1 ------------- From 6c9626b734a809c6643f6742e625ae44c6278bd7 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Wed, 20 Sep 2023 12:51:19 +0200 Subject: [PATCH 165/169] version 1.5b1 --- src/cnaas_nms/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cnaas_nms/version.py b/src/cnaas_nms/version.py index 3a8d0fef..4ee2c0b0 100644 --- a/src/cnaas_nms/version.py +++ b/src/cnaas_nms/version.py @@ -1,3 +1,3 @@ -__version__ = "1.5.0a1" +__version__ = "1.5.0b1" __version_info__ = tuple([field for field in __version__.split(".")]) __api_version__ = "v1.0" From a7ff2bcfa44f09bf902ff29dcfce5a3c5760169b Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Thu, 21 Sep 2023 10:20:08 +0200 Subject: [PATCH 166/169] Some newer 4.30 image with compression stuff takes even longer to activate, change timeout 60s to 120s --- src/cnaas_nms/devicehandler/firmware.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/firmware.py b/src/cnaas_nms/devicehandler/firmware.py index 34061d60..9a3c68ae 100644 --- a/src/cnaas_nms/devicehandler/firmware.py +++ b/src/cnaas_nms/devicehandler/firmware.py @@ -194,7 +194,7 @@ def arista_firmware_activate(task, filename: str, job_id: Optional[int] = None) res = task.run(netmiko_send_command, command_string="conf t", expect_string=".*config.*#") - res = task.run(netmiko_send_command, command_string=boot_file_cmd, read_timeout=60) + res = task.run(netmiko_send_command, command_string=boot_file_cmd, read_timeout=120) res = task.run(netmiko_send_command, command_string="end", expect_string=".*#") From 2df4cf52e691d8c27b010cd242716f528f2e98a9 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Thu, 21 Sep 2023 16:15:15 +0200 Subject: [PATCH 167/169] fix for using override confirm mode 0 evaluating to false/none --- src/cnaas_nms/devicehandler/sync_devices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 58050d35..0ab32ecc 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -368,7 +368,7 @@ def populate_device_vars( def get_confirm_mode(confirm_mode_override: Optional[int] = None) -> int: valid_modes = [0, 1, 2] - if confirm_mode_override and confirm_mode_override in valid_modes: + if confirm_mode_override is not None and confirm_mode_override in valid_modes: return confirm_mode_override elif api_settings.COMMIT_CONFIRMED_MODE and api_settings.COMMIT_CONFIRMED_MODE in valid_modes: return api_settings.COMMIT_CONFIRMED_MODE From a50a3f87cecc5088482398a7709b21188c62c44d Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Fri, 22 Sep 2023 14:55:42 +0200 Subject: [PATCH 168/169] Add support for IP settings in interfaces.yml --- docs/reporef/index.rst | 8 ++++++++ src/cnaas_nms/db/settings_fields.py | 16 ++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/docs/reporef/index.rst b/docs/reporef/index.rst index b295d1d0..4296db4b 100644 --- a/docs/reporef/index.rst +++ b/docs/reporef/index.rst @@ -404,6 +404,14 @@ Keys for interfaces.yml or interfaces_.yml: * aggregate_id: Optional. Identifier for configuring LACP etc. Integer value. Special value -1 means configure MLAG and use ID based on indexnum. * tags: Optional list of strings, custom user defined tags to apply. + * vrf: Optional VRF instance, must be specified if IP adress is specified + * ipv4_address: Optional IPv4 address for the interface + * ipv6_address: Optional IPv6 address for the interface + * mtu: Optional integer specifying MTU size + * acl_ipv4_in: Access control list to apply for ingress IPv4 traffic to interface. Optional. + * acl_ipv4_out: Access control list to apply for egress IPv4 traffic from interface. Optional. + * acl_ipv6_in: Access control list to apply for ingress IPv6 traffic to interface. Optional. + * acl_ipv6_out: Access control list to apply for egress IPv6 traffic from interface. Optional. * cli_append_str: Optional. Custom configuration to append to this interface. The "downlink" ifclass is used on DIST devices to specify that this interface diff --git a/src/cnaas_nms/db/settings_fields.py b/src/cnaas_nms/db/settings_fields.py index aafa9a8f..5ebcd5d6 100644 --- a/src/cnaas_nms/db/settings_fields.py +++ b/src/cnaas_nms/db/settings_fields.py @@ -139,8 +139,24 @@ class f_interface(BaseModel): tagged_vlan_list: Optional[List[int]] = None aggregate_id: Optional[int] = None tags: Optional[List[str]] = None + vrf: Optional[str] = vlan_name_schema + ipv4_address: Optional[str] = None + ipv6_address: Optional[str] = ipv6_if_schema + mtu: Optional[int] = mtu_schema + acl_ipv4_in: Optional[str] = None + acl_ipv4_out: Optional[str] = None + acl_ipv6_in: Optional[str] = None + acl_ipv6_out: Optional[str] = None cli_append_str: str = "" + @validator("ipv4_address") + def vrf_required_if_ipv4_address_set(cls, v, values, **kwargs): + if v: + validate_ipv4_if(v) + if "vrf" not in values or not values["vrf"]: + raise ValueError("VRF is required when specifying ipv4_gw") + return v + @validator("tagged_vlan_list", each_item=True) def check_valid_vlan_ids(cls, v): assert 0 < v < 4096 From dd461cd69ea3a400b20365a4500bb9ff0c331418 Mon Sep 17 00:00:00 2001 From: Johan Marcusson Date: Mon, 25 Sep 2023 09:38:15 +0200 Subject: [PATCH 169/169] Version 1.5.0 --- src/cnaas_nms/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cnaas_nms/version.py b/src/cnaas_nms/version.py index 4ee2c0b0..4f208dc3 100644 --- a/src/cnaas_nms/version.py +++ b/src/cnaas_nms/version.py @@ -1,3 +1,3 @@ -__version__ = "1.5.0b1" +__version__ = "1.5.0" __version_info__ = tuple([field for field in __version__.split(".")]) __api_version__ = "v1.0"