From 8f1300cda5ce0757cb0b6719243cd1917133164f Mon Sep 17 00:00:00 2001 From: Gabriel Cocenza Date: Thu, 18 Apr 2024 10:41:09 -0300 Subject: [PATCH] Increase Timeout (#377) - Duruing partner cloud upgrade it was detected that COU was failing to check if the whole model is active idle because the default time is not enough. - Keystone also needs some extra time, and this change from 30 to 40 minutes. --- README.md | 2 +- cou/apps/base.py | 2 +- cou/steps/plan.py | 4 ++-- docs/how-to/interruption.rst | 2 +- docs/how-to/no-backup.rst | 2 +- docs/how-to/plan-upgrade.rst | 4 ++-- docs/how-to/upgrade-cloud.rst | 6 +++--- docs/reference/environment-variables.rst | 2 +- tests/functional/tests/smoke.py | 2 +- tests/mocked_plans/sample_plans/base.yaml | 4 ++-- tests/unit/apps/test_auxiliary.py | 24 +++++++++++------------ tests/unit/apps/test_core.py | 24 +++++++++++------------ tests/unit/steps/test_hypervisor.py | 10 +++++----- tests/unit/steps/test_plan.py | 12 ++++++------ 14 files changed, 50 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index c8b47ce7..f0ed1f7e 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ Commands: - `COU_MODEL_RETRIES` - define how many times to retry the connection to Juju model before giving up. Default value is 5 times. - `COU_MODEL_RETRY_BACKOFF` - define number of seconds to increase the wait between connection to the Juju model retry attempts. Default value is 2 seconds. - `COU_STANDARD_IDLE_TIMEOUT` - how long COU will wait for an application to settle to active/idle and declare the upgrade complete. The default value is 300 seconds. -- `COU_LONG_IDLE_TIMEOUT` - a longer version of COU_STANDARD_IDLE_TIMEOUT for applications that are known to need more time than usual to upgrade like such as Keystone and Octavia. The default value is 1800 seconds. +- `COU_LONG_IDLE_TIMEOUT` - a longer version of COU_STANDARD_IDLE_TIMEOUT for applications that are known to need more time than usual to upgrade like such as Keystone and Octavia. The default value is 2400 seconds. ## Supported Upgrade Paths diff --git a/cou/apps/base.py b/cou/apps/base.py index d0020fb8..1a3473e7 100644 --- a/cou/apps/base.py +++ b/cou/apps/base.py @@ -48,7 +48,7 @@ STANDARD_IDLE_TIMEOUT: int = int( os.environ.get("COU_STANDARD_IDLE_TIMEOUT", 5 * 60) ) # default of 5 min -LONG_IDLE_TIMEOUT: int = int(os.environ.get("COU_LONG_IDLE_TIMEOUT", 30 * 60)) # default of 30 min +LONG_IDLE_TIMEOUT: int = int(os.environ.get("COU_LONG_IDLE_TIMEOUT", 40 * 60)) # default of 40 min ORIGIN_SETTINGS = ("openstack-origin", "source") REQUIRED_SETTINGS = ("enable-auto-restarts", "action-managed-upgrade", *ORIGIN_SETTINGS) LATEST_STABLE = "latest/stable" diff --git a/cou/steps/plan.py b/cou/steps/plan.py index f03945e4..46f6ed45 100644 --- a/cou/steps/plan.py +++ b/cou/steps/plan.py @@ -369,9 +369,9 @@ def _get_pre_upgrade_steps(analysis_result: Analysis, args: CLIargs) -> list[Pre coro=analysis_result.model.wait_for_active_idle( # NOTE (rgildein): We need to DEFAULT_TIMEOUT so it's possible to change if # a network is too slow, this could cause an issue. - # We are using max function to ensure timeout is always at least 11 (1 second + # We are using max function to ensure timeout is always at least 120 (110 seconds # higher than the idle_period to prevent false negative). - timeout=max(DEFAULT_TIMEOUT + 1, 11), + timeout=max(DEFAULT_TIMEOUT, 120), idle_period=10, raise_on_blocked=True, ), diff --git a/docs/how-to/interruption.rst b/docs/how-to/interruption.rst index 587b0256..10f63a5b 100644 --- a/docs/how-to/interruption.rst +++ b/docs/how-to/interruption.rst @@ -33,7 +33,7 @@ Usage example: Upgrade software packages on unit 'keystone/2' Upgrade 'keystone' to the new channel: 'victoria/stable' Change charm config of 'keystone' 'openstack-origin' to 'cloud:focal-victoria' - Wait for up to 1800s for model 'test-model' to reach the idle state + Wait for up to 2400s for model 'test-model' to reach the idle state Verify that the workload of 'keystone' has been upgraded Would you like to start the upgrade? Continue (y/N): n diff --git a/docs/how-to/no-backup.rst b/docs/how-to/no-backup.rst index 456ba184..774f29fe 100644 --- a/docs/how-to/no-backup.rst +++ b/docs/how-to/no-backup.rst @@ -49,7 +49,7 @@ Upgrade: Upgrade software packages on unit 'rabbitmq-server/2' Upgrade 'rabbitmq-server' to the new channel: '3.9/stable' Change charm config of 'rabbitmq-server' 'source' to 'cloud:focal-victoria' - Wait for up to 1800s for model 'test-model' to reach the idle state + Wait for up to 2400s for model 'test-model' to reach the idle state Verify that the workload of 'rabbitmq-server' has been upgraded Continue (y/n): y diff --git a/docs/how-to/plan-upgrade.rst b/docs/how-to/plan-upgrade.rst index 63ba94f8..87736caf 100644 --- a/docs/how-to/plan-upgrade.rst +++ b/docs/how-to/plan-upgrade.rst @@ -37,7 +37,7 @@ Output example Change charm config of 'keystone' 'action-managed-upgrade' to 'False' Upgrade 'keystone' to the new channel: 'victoria/stable' Change charm config of 'keystone' 'openstack-origin' to 'cloud:focal-victoria' - Wait for up to 1800s for model 'test_model' to reach the idle state + Wait for up to 2400s for model 'test_model' to reach the idle state Verify that the workload of 'keystone' has been upgraded on units: keystone/0 Control Plane subordinate(s) upgrade plan Upgrade plan for 'keystone-ldap' to 'victoria' @@ -59,7 +59,7 @@ Output example ├── Upgrade the unit: 'nova-compute/0' ├── Resume the unit: 'nova-compute/0' Enable nova-compute scheduler from unit: 'nova-compute/0' - Wait for up to 1800s for model 'test_model' to reach the idle state + Wait for up to 2400s for model 'test_model' to reach the idle state Verify that the workload of 'nova-compute' has been upgraded on units: nova-compute/0 Remaining Data Plane principal(s) upgrade plan Upgrade plan for 'ceph-osd' to 'victoria' diff --git a/docs/how-to/upgrade-cloud.rst b/docs/how-to/upgrade-cloud.rst index c6befda7..f2c7e43c 100644 --- a/docs/how-to/upgrade-cloud.rst +++ b/docs/how-to/upgrade-cloud.rst @@ -130,7 +130,7 @@ Usage example Upgrade software packages on unit 'rabbitmq-server/2' Upgrade 'rabbitmq-server' to the new channel: '3.9/stable' Change charm config of 'rabbitmq-server' 'source' to 'cloud:focal-victoria' - Wait for up to 1800s for model 'test-model' to reach the idle state + Wait for up to 2400s for model 'test-model' to reach the idle state Verify that the workload of 'rabbitmq-server' has been upgraded ... Would you like to start the upgrade? Continue (y/N): y @@ -145,7 +145,7 @@ Usage example Upgrade software packages on unit 'rabbitmq-server/2' Upgrade 'rabbitmq-server' to the new channel: '3.9/stable' Change charm config of 'rabbitmq-server' 'source' to 'cloud:focal-victoria' - Wait for up to 1800s for model 'test-model' to reach the idle state + Wait for up to 2400s for model 'test-model' to reach the idle state Verify that the workload of 'rabbitmq-server' has been upgraded Continue (y/n): y @@ -158,7 +158,7 @@ Usage example Upgrade software packages on unit 'keystone/2' Upgrade 'keystone' to the new channel: 'victoria/stable' Change charm config of 'keystone' 'openstack-origin' to 'cloud:focal-victoria' - Wait for up to 1800s for model 'test-model' to reach the idle state + Wait for up to 2400s for model 'test-model' to reach the idle state Verify that the workload of 'keystone' has been upgraded Continue (y/n): y diff --git a/docs/reference/environment-variables.rst b/docs/reference/environment-variables.rst index ca875bee..b6a04158 100644 --- a/docs/reference/environment-variables.rst +++ b/docs/reference/environment-variables.rst @@ -14,4 +14,4 @@ Environment Variables to **active/idle** and declare the upgrade complete. The default value is 300 seconds. * **COU_LONG_IDLE_TIMEOUT** - a longer version of **COU_STANDARD_IDLE_TIMEOUT** for applications that are known to need more time than usual to upgrade, such as Keystone and Octavia. The - default value is 1800 seconds. + default value is 2400 seconds. diff --git a/tests/functional/tests/smoke.py b/tests/functional/tests/smoke.py index 80859634..ee3ed61a 100644 --- a/tests/functional/tests/smoke.py +++ b/tests/functional/tests/smoke.py @@ -153,7 +153,7 @@ def generate_expected_plan(self, backup: bool = True) -> str: "\t\t\t\tUpgrade software packages on unit 'mysql-innodb-cluster/2'\n" "\t\t\tChange charm config of 'mysql-innodb-cluster' 'source' to " "'cloud:focal-victoria'\n" - "\t\t\tWait for up to 1800s for app 'mysql-innodb-cluster' to reach the idle state\n" + "\t\t\tWait for up to 2400s for app 'mysql-innodb-cluster' to reach the idle state\n" "\t\t\tVerify that the workload of 'mysql-innodb-cluster' has been upgraded on units: " "mysql-innodb-cluster/0, mysql-innodb-cluster/1, mysql-innodb-cluster/2\n" ) diff --git a/tests/mocked_plans/sample_plans/base.yaml b/tests/mocked_plans/sample_plans/base.yaml index ff37be8d..0dce9bb7 100644 --- a/tests/mocked_plans/sample_plans/base.yaml +++ b/tests/mocked_plans/sample_plans/base.yaml @@ -10,7 +10,7 @@ plan: | Change charm config of 'keystone' 'action-managed-upgrade' to 'False' Upgrade 'keystone' to the new channel: 'victoria/stable' Change charm config of 'keystone' 'openstack-origin' to 'cloud:focal-victoria' - Wait for up to 1800s for model 'base' to reach the idle state + Wait for up to 2400s for model 'base' to reach the idle state Verify that the workload of 'keystone' has been upgraded on units: keystone/0 Control Plane subordinate(s) upgrade plan Upgrade plan for 'keystone-ldap' to 'victoria' @@ -32,7 +32,7 @@ plan: | ├── Upgrade the unit: 'nova-compute/0' ├── Resume the unit: 'nova-compute/0' Enable nova-compute scheduler from unit: 'nova-compute/0' - Wait for up to 1800s for model 'base' to reach the idle state + Wait for up to 2400s for model 'base' to reach the idle state Verify that the workload of 'nova-compute' has been upgraded on units: nova-compute/0 Remaining Data Plane principal(s) upgrade plan Upgrade plan for 'ceph-osd' to 'victoria' diff --git a/tests/unit/apps/test_auxiliary.py b/tests/unit/apps/test_auxiliary.py index ddbe4618..8d6bee41 100644 --- a/tests/unit/apps/test_auxiliary.py +++ b/tests/unit/apps/test_auxiliary.py @@ -168,9 +168,9 @@ def test_auxiliary_upgrade_plan_ussuri_to_victoria_change_channel(model): ), ), PostUpgradeStep( - description=f"Wait for up to 1800s for model '{model.name}' to reach the idle state", + description=f"Wait for up to 2400s for model '{model.name}' to reach the idle state", parallel=False, - coro=model.wait_for_active_idle(1800, apps=None), + coro=model.wait_for_active_idle(2400, apps=None), ), PostUpgradeStep( description=f"Verify that the workload of '{app.name}' has been upgraded on units: " @@ -238,9 +238,9 @@ def test_auxiliary_upgrade_plan_ussuri_to_victoria(model): ), ), PostUpgradeStep( - description=f"Wait for up to 1800s for model '{model.name}' to reach the idle state", + description=f"Wait for up to 2400s for model '{model.name}' to reach the idle state", parallel=False, - coro=model.wait_for_active_idle(1800, apps=None), + coro=model.wait_for_active_idle(2400, apps=None), ), PostUpgradeStep( description=f"Verify that the workload of '{app.name}' has been upgraded on units: " @@ -314,9 +314,9 @@ def test_auxiliary_upgrade_plan_ussuri_to_victoria_ch_migration(model): ), ), PostUpgradeStep( - description=f"Wait for up to 1800s for model '{model.name}' to reach the idle state", + description=f"Wait for up to 2400s for model '{model.name}' to reach the idle state", parallel=False, - coro=model.wait_for_active_idle(1800, apps=None), + coro=model.wait_for_active_idle(2400, apps=None), ), PostUpgradeStep( description=f"Verify that the workload of '{app.name}' has been upgraded on units: " @@ -694,9 +694,9 @@ def test_ceph_mon_upgrade_plan_xena_to_yoga(model): ), ), PostUpgradeStep( - description=f"Wait for up to 1800s for model '{model.name}' to reach the idle state", + description=f"Wait for up to 2400s for model '{model.name}' to reach the idle state", parallel=False, - coro=model.wait_for_active_idle(1800, apps=None), + coro=model.wait_for_active_idle(2400, apps=None), ), PostUpgradeStep( description=f"Verify that the workload of '{app.name}' has been upgraded on units: " @@ -772,9 +772,9 @@ def test_ceph_mon_upgrade_plan_ussuri_to_victoria(model): ), ), PostUpgradeStep( - description=f"Wait for up to 1800s for model '{model.name}' to reach the idle state", + description=f"Wait for up to 2400s for model '{model.name}' to reach the idle state", parallel=False, - coro=model.wait_for_active_idle(1800, apps=None), + coro=model.wait_for_active_idle(2400, apps=None), ), PostUpgradeStep( description=f"Verify that the workload of '{app.name}' has been upgraded on units: " @@ -1119,9 +1119,9 @@ def test_mysql_innodb_cluster_upgrade(model): ), ), PostUpgradeStep( - description=f"Wait for up to 1800s for app '{app.name}' to reach the idle state", + description=f"Wait for up to 2400s for app '{app.name}' to reach the idle state", parallel=False, - coro=model.wait_for_active_idle(1800, apps=[app.name]), + coro=model.wait_for_active_idle(2400, apps=[app.name]), ), PostUpgradeStep( description=f"Verify that the workload of '{app.name}' has been upgraded on units: " diff --git a/tests/unit/apps/test_core.py b/tests/unit/apps/test_core.py index 7ad69d23..05df3af0 100644 --- a/tests/unit/apps/test_core.py +++ b/tests/unit/apps/test_core.py @@ -230,9 +230,9 @@ def test_upgrade_plan_ussuri_to_victoria(model): ), ), PostUpgradeStep( - description=f"Wait for up to 1800s for model '{model.name}' to reach the idle state", + description=f"Wait for up to 2400s for model '{model.name}' to reach the idle state", parallel=False, - coro=model.wait_for_active_idle(1800, apps=None), + coro=model.wait_for_active_idle(2400, apps=None), ), PostUpgradeStep( description=f"Verify that the workload of '{app.name}' has been upgraded on units: " @@ -314,9 +314,9 @@ def test_upgrade_plan_ussuri_to_victoria_ch_migration(model): ), ), PostUpgradeStep( - description=f"Wait for up to 1800s for model '{model.name}' to reach the idle state", + description=f"Wait for up to 2400s for model '{model.name}' to reach the idle state", parallel=False, - coro=model.wait_for_active_idle(1800, apps=None), + coro=model.wait_for_active_idle(2400, apps=None), ), PostUpgradeStep( description=f"Verify that the workload of '{app.name}' has been upgraded on units: " @@ -392,9 +392,9 @@ def test_upgrade_plan_channel_on_next_os_release(model): ), ), PostUpgradeStep( - description=f"Wait for up to 1800s for model '{model.name}' to reach the idle state", + description=f"Wait for up to 2400s for model '{model.name}' to reach the idle state", parallel=False, - coro=model.wait_for_active_idle(1800, apps=None), + coro=model.wait_for_active_idle(2400, apps=None), ), PostUpgradeStep( description=f"Verify that the workload of '{app.name}' has been upgraded on units: " @@ -471,9 +471,9 @@ def test_upgrade_plan_origin_already_on_next_openstack_release(model): coro=model.upgrade_charm(app.name, "victoria/stable"), ), PostUpgradeStep( - description=f"Wait for up to 1800s for model '{model.name}' to reach the idle state", + description=f"Wait for up to 2400s for model '{model.name}' to reach the idle state", parallel=False, - coro=model.wait_for_active_idle(1800, apps=None), + coro=model.wait_for_active_idle(2400, apps=None), ), PostUpgradeStep( description=f"Verify that the workload of '{app.name}' has been upgraded on units: " @@ -588,9 +588,9 @@ def test_upgrade_plan_application_already_disable_action_managed(model): ), ), PostUpgradeStep( - description=f"Wait for up to 1800s for model '{model.name}' to reach the idle state", + description=f"Wait for up to 2400s for model '{model.name}' to reach the idle state", parallel=False, - coro=model.wait_for_active_idle(1800, apps=None), + coro=model.wait_for_active_idle(2400, apps=None), ), PostUpgradeStep( description=f"Verify that the workload of '{app.name}' has been upgraded on units: " @@ -809,7 +809,7 @@ def test_nova_compute_upgrade_plan(model): Enable nova-compute scheduler from unit: 'nova-compute/0' Enable nova-compute scheduler from unit: 'nova-compute/1' Enable nova-compute scheduler from unit: 'nova-compute/2' - Wait for up to 1800s for model 'test_model' to reach the idle state + Wait for up to 2400s for model 'test_model' to reach the idle state Verify that the workload of 'nova-compute' has been upgraded on units: nova-compute/0, nova-compute/1, nova-compute/2 """ # noqa: E501 line too long ) @@ -862,7 +862,7 @@ def test_nova_compute_upgrade_plan_single_unit(model): ├── Upgrade the unit: 'nova-compute/0' ├── Resume the unit: 'nova-compute/0' Enable nova-compute scheduler from unit: 'nova-compute/0' - Wait for up to 1800s for model 'test_model' to reach the idle state + Wait for up to 2400s for model 'test_model' to reach the idle state Verify that the workload of 'nova-compute' has been upgraded on units: nova-compute/0 """ ) diff --git a/tests/unit/steps/test_hypervisor.py b/tests/unit/steps/test_hypervisor.py index 418eb230..f9fbcc93 100644 --- a/tests/unit/steps/test_hypervisor.py +++ b/tests/unit/steps/test_hypervisor.py @@ -420,7 +420,7 @@ def test_hypervisor_upgrade_plan(model): Wait for up to 300s for app 'cinder' to reach the idle state Verify that the workload of 'cinder' has been upgraded on units: cinder/0 Enable nova-compute scheduler from unit: 'nova-compute/0' - Wait for up to 1800s for model 'test_model' to reach the idle state + Wait for up to 2400s for model 'test_model' to reach the idle state Verify that the workload of 'nova-compute' has been upgraded on units: nova-compute/0 Upgrade plan for 'az-1' to 'victoria' Disable nova-compute scheduler from unit: 'nova-compute/1' @@ -437,7 +437,7 @@ def test_hypervisor_upgrade_plan(model): ├── Upgrade the unit: 'nova-compute/1' ├── Resume the unit: 'nova-compute/1' Enable nova-compute scheduler from unit: 'nova-compute/1' - Wait for up to 1800s for model 'test_model' to reach the idle state + Wait for up to 2400s for model 'test_model' to reach the idle state Verify that the workload of 'nova-compute' has been upgraded on units: nova-compute/1 Upgrade plan for 'az-2' to 'victoria' Disable nova-compute scheduler from unit: 'nova-compute/2' @@ -454,7 +454,7 @@ def test_hypervisor_upgrade_plan(model): ├── Upgrade the unit: 'nova-compute/2' ├── Resume the unit: 'nova-compute/2' Enable nova-compute scheduler from unit: 'nova-compute/2' - Wait for up to 1800s for model 'test_model' to reach the idle state + Wait for up to 2400s for model 'test_model' to reach the idle state Verify that the workload of 'nova-compute' has been upgraded on units: nova-compute/2 """ ) @@ -547,7 +547,7 @@ def test_hypervisor_upgrade_plan_single_machine(model): Wait for up to 300s for app 'cinder' to reach the idle state Verify that the workload of 'cinder' has been upgraded on units: cinder/0 Enable nova-compute scheduler from unit: 'nova-compute/0' - Wait for up to 1800s for model 'test_model' to reach the idle state + Wait for up to 2400s for model 'test_model' to reach the idle state Verify that the workload of 'nova-compute' has been upgraded on units: nova-compute/0 """ ) @@ -639,7 +639,7 @@ def test_hypervisor_upgrade_plan_some_units_upgraded(model): Wait for up to 300s for app 'cinder' to reach the idle state Verify that the workload of 'cinder' has been upgraded on units: cinder/2 Enable nova-compute scheduler from unit: 'nova-compute/2' - Wait for up to 1800s for model 'test_model' to reach the idle state + Wait for up to 2400s for model 'test_model' to reach the idle state Verify that the workload of 'nova-compute' has been upgraded on units: nova-compute/2 """ ) diff --git a/tests/unit/steps/test_plan.py b/tests/unit/steps/test_plan.py index 27aff79e..945f3d63 100644 --- a/tests/unit/steps/test_plan.py +++ b/tests/unit/steps/test_plan.py @@ -56,9 +56,9 @@ def generate_expected_upgrade_plan_principal(app, target, model): if app.charm in ["rabbitmq-server", "ceph-mon", "keystone"]: # apps waiting for whole model wait_step = PostUpgradeStep( - description=f"Wait for up to 1800s for model '{model.name}' to reach the idle state", + description=f"Wait for up to 2400s for model '{model.name}' to reach the idle state", parallel=False, - coro=model.wait_for_active_idle(1800, apps=None), + coro=model.wait_for_active_idle(2400, apps=None), ) else: wait_step = PostUpgradeStep( @@ -155,7 +155,7 @@ async def test_generate_plan(mock_filter_hypervisors, model, cli_args): Change charm config of 'keystone' 'action-managed-upgrade' to 'False' Upgrade 'keystone' to the new channel: 'victoria/stable' Change charm config of 'keystone' 'openstack-origin' to 'cloud:focal-victoria' - Wait for up to 1800s for model 'test_model' to reach the idle state + Wait for up to 2400s for model 'test_model' to reach the idle state Verify that the workload of 'keystone' has been upgraded on units: keystone/0 Control Plane subordinate(s) upgrade plan Upgrade plan for 'keystone-ldap' to 'victoria' @@ -177,7 +177,7 @@ async def test_generate_plan(mock_filter_hypervisors, model, cli_args): ├── Upgrade the unit: 'nova-compute/0' ├── Resume the unit: 'nova-compute/0' Enable nova-compute scheduler from unit: 'nova-compute/0' - Wait for up to 1800s for model 'test_model' to reach the idle state + Wait for up to 2400s for model 'test_model' to reach the idle state Verify that the workload of 'nova-compute' has been upgraded on units: \ nova-compute/0 Remaining Data Plane principal(s) upgrade plan @@ -333,7 +333,7 @@ async def test_generate_plan_with_warning_messages(mock_filter_hypervisors, mode ├── Upgrade the unit: 'nova-compute/0' ├── Resume the unit: 'nova-compute/0' Enable nova-compute scheduler from unit: 'nova-compute/0' - Wait for up to 1800s for model 'test_model' to reach the idle state + Wait for up to 2400s for model 'test_model' to reach the idle state Verify that the workload of 'nova-compute' has been upgraded on units: \ nova-compute/0 Remaining Data Plane principal(s) upgrade plan @@ -1020,7 +1020,7 @@ def test_get_pre_upgrade_steps(cli_backup, cli_args, model): description="Verify that all OpenStack applications are in idle state", parallel=False, coro=mock_analysis_result.model.wait_for_active_idle( - timeout=11, idle_period=10, raise_on_blocked=True + timeout=120, idle_period=10, raise_on_blocked=True ), ) )