Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix wait for idle on slow network #183

Merged
merged 5 commits into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion cou/steps/plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
from cou.steps import PreUpgradeStep, UpgradePlan
from cou.steps.analyze import Analysis
from cou.steps.backup import backup
from cou.utils.juju_utils import DEFAULT_TIMEOUT
from cou.utils.openstack import LTS_TO_OS_RELEASE, OpenStackRelease

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -77,7 +78,16 @@ async def generate_plan(analysis_result: Analysis, backup_database: bool) -> Upg
PreUpgradeStep(
description="Verify that all OpenStack applications are in idle state",
parallel=False,
coro=analysis_result.model.wait_for_idle(timeout=11, idle_period=10),
coro=analysis_result.model.wait_for_idle(
# NOTE (rgildein): We need to DEFAULT_TIMEOUT so it's possible to change if
# a network is too slow, this could cause an issue.
# Since the default timeout is 10 seconds, we need to add 1 second to it to make
# sure that timeout is greater than idle_period. At the same time, we are using max
# function to ensure lower timeout used it here is always at least 11.
agileshaw marked this conversation as resolved.
Show resolved Hide resolved
timeout=max(DEFAULT_TIMEOUT + 1, 11),
idle_period=10,
raise_on_blocked=True,
),
)
)
if backup_database:
Expand Down
22 changes: 17 additions & 5 deletions cou/utils/juju_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from juju.client._definitions import FullStatus
from juju.client.connector import NoConnectionException
from juju.client.jujudata import FileJujuData
from juju.errors import JujuError
from juju.errors import JujuAppError, JujuError, JujuUnitError
from juju.model import Model
from juju.unit import Unit
from macaroonbakery.httpbakery import BakeryException
Expand Down Expand Up @@ -450,6 +450,7 @@ async def wait_for_idle(
timeout: int,
idle_period: int = DEFAULT_MODEL_IDLE_PERIOD,
apps: Optional[list[str]] = None,
raise_on_blocked: bool = False,
) -> None:
"""Wait for applications to reach an idle state.

Expand All @@ -464,6 +465,9 @@ async def wait_for_idle(
:type idle_period: int
:param apps: Applications to wait, defaults to None
:type apps: Optional[list[str]], optional
:param raise_on_blocked: If any unit or app going into "blocked" status immediately raises
gabrielcocenza marked this conversation as resolved.
Show resolved Hide resolved
WaitForApplicationsTimeout.
gabrielcocenza marked this conversation as resolved.
Show resolved Hide resolved
:type raise_on_blocked: bool
"""

@retry(timeout=timeout, no_retry_exceptions=(WaitForApplicationsTimeout,))
Expand All @@ -472,14 +476,22 @@ async def _wait_for_idle() -> None:
# NOTE(rgildein): Defining wrapper so we can use retry with proper timeout
model = await self._get_model()
try:
await model.wait_for_idle(apps=apps, timeout=timeout, idle_period=idle_period)
except asyncio.exceptions.TimeoutError as error:
await model.wait_for_idle(
apps=apps,
timeout=timeout,
idle_period=idle_period,
raise_on_blocked=raise_on_blocked,
agileshaw marked this conversation as resolved.
Show resolved Hide resolved
)
except (asyncio.exceptions.TimeoutError, JujuAppError, JujuUnitError) as error:
# NOTE(rgildein): Catching TimeoutError raised as exception when wait_for_idle
# reached timeout. Also adding two spaces to make it more user friendly.
# example:
# Timed out waiting for model:
# nova-compute/0 [executing] maintenance: Installing packages
msg = str(error).replace("\n", "\n ")
# rabbitmq-server/0 [idle] active: Unit is ready
# neutron-api/0 [idle] active: Unit is ready
# glance/0 [idle] active: Unit is ready
# cinder/0 [idle] active: Unit is ready
msg = str(error).replace("\n", "\n ", 1)
raise WaitForApplicationsTimeout(msg) from error

if apps is None:
Expand Down
4 changes: 3 additions & 1 deletion tests/unit/steps/test_steps_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,9 @@ async def test_generate_plan(apps, model):
PreUpgradeStep(
description="Verify that all OpenStack applications are in idle state",
parallel=False,
coro=analysis_result.model.wait_for_idle(timeout=11, idle_period=10),
coro=analysis_result.model.wait_for_idle(
timeout=11, idle_period=10, raise_on_blocked=True
),
)
)
expected_plan.add_step(
Expand Down
15 changes: 12 additions & 3 deletions tests/unit/utils/test_juju_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,10 @@ async def test_coumodel_wait_for_idle(mock_get_supported_apps, mocked_model):
await model.wait_for_idle(timeout)

mocked_model.wait_for_idle.assert_awaited_once_with(
apps=["app1", "app2"], timeout=timeout, idle_period=juju_utils.DEFAULT_MODEL_IDLE_PERIOD
apps=["app1", "app2"],
timeout=timeout,
idle_period=juju_utils.DEFAULT_MODEL_IDLE_PERIOD,
raise_on_blocked=False,
)
mock_get_supported_apps.assert_awaited_once_with()

Expand All @@ -497,7 +500,10 @@ async def test_coumodel_wait_for_idle_apps(mock_get_supported_apps, mocked_model
await model.wait_for_idle(timeout, apps=["app1"])

mocked_model.wait_for_idle.assert_awaited_once_with(
apps=["app1"], timeout=timeout, idle_period=juju_utils.DEFAULT_MODEL_IDLE_PERIOD
apps=["app1"],
timeout=timeout,
idle_period=juju_utils.DEFAULT_MODEL_IDLE_PERIOD,
raise_on_blocked=False,
)
mock_get_supported_apps.assert_not_awaited()

Expand All @@ -515,6 +521,9 @@ async def test_coumodel_wait_for_idle_timeout(mock_get_supported_apps, mocked_mo
await model.wait_for_idle(timeout, apps=exp_apps)

mocked_model.wait_for_idle.assert_awaited_once_with(
apps=exp_apps, timeout=timeout, idle_period=juju_utils.DEFAULT_MODEL_IDLE_PERIOD
apps=exp_apps,
timeout=timeout,
idle_period=juju_utils.DEFAULT_MODEL_IDLE_PERIOD,
raise_on_blocked=False,
)
mock_get_supported_apps.assert_not_awaited()
Loading