Skip to content

Commit

Permalink
refactor(protocol-engine): Keep track of failed commands' error recov…
Browse files Browse the repository at this point in the history
…ery types (#14795)
  • Loading branch information
SyntaxColoring authored Apr 4, 2024
1 parent eecf117 commit f95af4f
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 6 deletions.
32 changes: 30 additions & 2 deletions api/src/opentrons/protocol_engine/state/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import enum
from dataclasses import dataclass
from datetime import datetime
from typing import List, Optional, Union
from typing import Dict, List, Optional, Union
from typing_extensions import assert_never

from opentrons_shared_data.errors import EnumeratedError, ErrorCodes, PythonException
Expand Down Expand Up @@ -164,6 +164,19 @@ class CommandState:
# that we're doing error recovery. See if we can implement robot-server pagination
# atop simpler concepts, like "the last command that ran" or "the next command that
# would run."
#
# TODO(mm, 2024-04-03): Can this be replaced by
# CommandHistory.get_terminal_command() now?

command_error_recovery_types: Dict[str, ErrorRecoveryType]
"""For each command that failed (indexed by ID), what its recovery type was.
This only includes commands that actually failed, not the ones that we mark as
failed but that are effectively "cancelled" because a command before them failed.
This separate attribute is a stopgap until error recovery concepts are a bit more
stable. Eventually, we might want this info to be stored directly on each command.
"""

finish_error: Optional[ErrorOccurrence]
"""The error that happened during the post-run finish steps (homing & dropping tips), if any."""
Expand Down Expand Up @@ -199,6 +212,7 @@ def __init__(
run_error=None,
finish_error=None,
failed_command=None,
command_error_recovery_types={},
run_completed_at=None,
run_started_at=None,
latest_command_hash=None,
Expand Down Expand Up @@ -253,11 +267,11 @@ def handle_action(self, action: Action) -> None: # noqa: C901
error=action.error,
)

# TODO(mc, 2022-06-06): add new "cancelled" status or similar
self._update_to_failed(
command_id=action.command_id,
failed_at=action.failed_at,
error_occurrence=error_occurrence,
error_recovery_type=action.type,
notes=action.notes,
)

Expand All @@ -271,10 +285,12 @@ def handle_action(self, action: Action) -> None: # noqa: C901
self._state.command_history.get_setup_queue_ids()
)
for command_id in other_command_ids_to_fail:
# TODO(mc, 2022-06-06): add new "cancelled" status or similar
self._update_to_failed(
command_id=command_id,
failed_at=action.failed_at,
error_occurrence=None,
error_recovery_type=None,
notes=None,
)
self._state.command_history.clear_setup_queue()
Expand All @@ -289,10 +305,12 @@ def handle_action(self, action: Action) -> None: # noqa: C901
self._state.command_history.get_queue_ids()
)
for command_id in other_command_ids_to_fail:
# TODO(mc, 2022-06-06): add new "cancelled" status or similar
self._update_to_failed(
command_id=command_id,
failed_at=action.failed_at,
error_occurrence=None,
error_recovery_type=None,
notes=None,
)
self._state.command_history.clear_queue()
Expand Down Expand Up @@ -376,6 +394,7 @@ def _update_to_failed(
command_id: str,
failed_at: datetime,
error_occurrence: Optional[ErrorOccurrence],
error_recovery_type: Optional[ErrorRecoveryType],
notes: Optional[List[CommandNote]],
) -> None:
prev_entry = self._state.command_history.get(command_id)
Expand All @@ -391,6 +410,8 @@ def _update_to_failed(
}
)
self._state.command_history.set_command_failed(failed_command)
if error_recovery_type is not None:
self._state.command_error_recovery_types[command_id] = error_recovery_type

@staticmethod
def _map_run_exception_to_error_occurrence(
Expand Down Expand Up @@ -709,6 +730,13 @@ def raise_fatal_command_error(self) -> None:
message=failed_command.command.error.detail,
)

def get_error_recovery_type(self, command_id: str) -> ErrorRecoveryType:
"""Return the error recovery type with which the given command failed.
The command ID is assumed to point to a failed command.
"""
return self.state.command_error_recovery_types[command_id]

def get_is_stopped(self) -> bool:
"""Get whether an engine stop has completed."""
return self._state.run_completed_at is not None
Expand Down
79 changes: 79 additions & 0 deletions api/tests/opentrons/protocol_engine/state/test_command_state.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""Tests for the CommandStore+CommandState+CommandView trifecta.
The trifecta is tested here as a single unit, treating CommandState as a private
implementation detail.
"""

from datetime import datetime

from opentrons_shared_data.errors import PythonException

from opentrons.protocol_engine import actions, commands
from opentrons.protocol_engine.error_recovery_policy import ErrorRecoveryType
from opentrons.protocol_engine.state.commands import CommandStore, CommandView
from opentrons.protocol_engine.state.config import Config
from opentrons.protocol_engine.types import DeckType


def _make_config() -> Config:
return Config(
# Choice of robot and deck type is arbitrary.
robot_type="OT-2 Standard",
deck_type=DeckType.OT2_STANDARD,
)


def test_error_recovery_type_tracking() -> None:
"""It should keep track of each failed command's error recovery type."""
subject = CommandStore(config=_make_config(), is_door_open=False)

subject.handle_action(
actions.QueueCommandAction(
command_id="c1",
created_at=datetime.now(),
request=commands.CommentCreate(
params=commands.CommentParams(message="yeehaw"),
),
request_hash=None,
)
)
subject.handle_action(
actions.QueueCommandAction(
command_id="c2",
created_at=datetime.now(),
request=commands.CommentCreate(
params=commands.CommentParams(message="yeehaw"),
),
request_hash=None,
)
)
subject.handle_action(
actions.RunCommandAction(command_id="c1", started_at=datetime.now())
)
subject.handle_action(
actions.FailCommandAction(
command_id="c1",
error_id="c1-error",
failed_at=datetime.now(),
error=PythonException(RuntimeError("new sheriff in town")),
notes=[],
type=ErrorRecoveryType.WAIT_FOR_RECOVERY,
)
)
subject.handle_action(
actions.RunCommandAction(command_id="c2", started_at=datetime.now())
)
subject.handle_action(
actions.FailCommandAction(
command_id="c2",
error_id="c2-error",
failed_at=datetime.now(),
error=PythonException(RuntimeError("new sheriff in town")),
notes=[],
type=ErrorRecoveryType.FAIL_RUN,
)
)

view = CommandView(subject.state)
assert view.get_error_recovery_type("c1") == ErrorRecoveryType.WAIT_FOR_RECOVERY
assert view.get_error_recovery_type("c2") == ErrorRecoveryType.FAIL_RUN
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
"""Tests for the command lifecycle state."""
"""Tests for CommandStore.
DEPRECATED: Testing CommandStore independently of CommandView is no longer helpful.
Add new tests to test_command_state.py, where they can be tested together.
"""


import pytest
from datetime import datetime
from typing import NamedTuple, Type
Expand Down Expand Up @@ -79,6 +85,7 @@ def test_initial_state(
run_error=None,
finish_error=None,
failed_command=None,
command_error_recovery_types={},
latest_command_hash=None,
stopped_by_estop=False,
)
Expand Down Expand Up @@ -826,6 +833,7 @@ def test_command_store_handles_pause_action(pause_source: PauseSource) -> None:
run_error=None,
finish_error=None,
failed_command=None,
command_error_recovery_types={},
latest_command_hash=None,
stopped_by_estop=False,
)
Expand All @@ -850,6 +858,7 @@ def test_command_store_handles_play_action(pause_source: PauseSource) -> None:
run_error=None,
finish_error=None,
failed_command=None,
command_error_recovery_types={},
run_started_at=datetime(year=2021, month=1, day=1),
latest_command_hash=None,
stopped_by_estop=False,
Expand Down Expand Up @@ -880,6 +889,7 @@ def test_command_store_handles_finish_action() -> None:
run_error=None,
finish_error=None,
failed_command=None,
command_error_recovery_types={},
run_started_at=datetime(year=2021, month=1, day=1),
latest_command_hash=None,
stopped_by_estop=False,
Expand Down Expand Up @@ -925,6 +935,7 @@ def test_command_store_handles_stop_action(from_estop: bool) -> None:
run_error=None,
finish_error=None,
failed_command=None,
command_error_recovery_types={},
run_started_at=datetime(year=2021, month=1, day=1),
latest_command_hash=None,
stopped_by_estop=from_estop,
Expand Down Expand Up @@ -954,6 +965,7 @@ def test_command_store_cannot_restart_after_should_stop() -> None:
run_error=None,
finish_error=None,
failed_command=None,
command_error_recovery_types={},
run_started_at=None,
latest_command_hash=None,
stopped_by_estop=False,
Expand Down Expand Up @@ -1085,6 +1097,7 @@ def test_command_store_wraps_unknown_errors() -> None:
),
run_started_at=None,
failed_command=None,
command_error_recovery_types={},
latest_command_hash=None,
stopped_by_estop=False,
)
Expand Down Expand Up @@ -1145,6 +1158,7 @@ def __init__(self, message: str) -> None:
errorCode=ErrorCodes.PIPETTE_NOT_PRESENT.value.code,
),
failed_command=None,
command_error_recovery_types={},
run_started_at=None,
latest_command_hash=None,
stopped_by_estop=False,
Expand Down Expand Up @@ -1176,6 +1190,7 @@ def test_command_store_ignores_stop_after_graceful_finish() -> None:
run_error=None,
finish_error=None,
failed_command=None,
command_error_recovery_types={},
run_started_at=datetime(year=2021, month=1, day=1),
latest_command_hash=None,
stopped_by_estop=False,
Expand Down Expand Up @@ -1207,6 +1222,7 @@ def test_command_store_ignores_finish_after_non_graceful_stop() -> None:
run_error=None,
finish_error=None,
failed_command=None,
command_error_recovery_types={},
run_started_at=datetime(year=2021, month=1, day=1),
latest_command_hash=None,
stopped_by_estop=False,
Expand All @@ -1219,6 +1235,8 @@ def test_command_store_ignores_finish_after_non_graceful_stop() -> None:

def test_command_store_handles_command_failed() -> None:
"""It should store an error and mark the command if it fails."""
error_recovery_type = ErrorRecoveryType.FAIL_RUN

expected_error_occurrence = errors.ErrorOccurrence(
id="error-id",
errorType="ProtocolEngineError",
Expand Down Expand Up @@ -1281,7 +1299,7 @@ def test_command_store_handles_command_failed() -> None:
source="source",
)
],
type=ErrorRecoveryType.FAIL_RUN,
type=error_recovery_type,
)
)

Expand All @@ -1299,6 +1317,7 @@ def test_command_store_handles_command_failed() -> None:
run_error=None,
finish_error=None,
failed_command=failed_command_entry,
command_error_recovery_types={expected_failed_command.id: error_recovery_type},
run_started_at=None,
latest_command_hash=None,
stopped_by_estop=False,
Expand Down Expand Up @@ -1327,6 +1346,7 @@ def test_handles_hardware_stopped() -> None:
run_error=None,
finish_error=None,
failed_command=None,
command_error_recovery_types={},
run_started_at=None,
latest_command_hash=None,
stopped_by_estop=False,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
"""Labware state store tests."""
"""Tests for CommandView.
DEPRECATED: Testing CommandView independently of CommandStore is no longer helpful.
Add new tests to test_command_state.py, where they can be tested together.
"""


import pytest
from contextlib import nullcontext as does_not_raise
from datetime import datetime
from typing import List, NamedTuple, Optional, Sequence, Type, Union
from typing import Dict, List, NamedTuple, Optional, Sequence, Type, Union

from opentrons.protocol_engine import EngineStatus, commands as cmd, errors
from opentrons.protocol_engine.actions import (
Expand All @@ -14,6 +20,7 @@
)
from opentrons.protocol_engine.actions.actions import ResumeFromRecoveryAction

from opentrons.protocol_engine.error_recovery_policy import ErrorRecoveryType
from opentrons.protocol_engine.state.commands import (
CommandState,
CommandView,
Expand Down Expand Up @@ -50,6 +57,7 @@ def get_command_view(
queued_setup_command_ids: Sequence[str] = (),
run_error: Optional[errors.ErrorOccurrence] = None,
failed_command: Optional[CommandEntry] = None,
command_error_recovery_types: Optional[Dict[str, ErrorRecoveryType]] = None,
finish_error: Optional[errors.ErrorOccurrence] = None,
commands: Sequence[cmd.Command] = (),
latest_command_hash: Optional[str] = None,
Expand Down Expand Up @@ -81,6 +89,7 @@ def get_command_view(
run_error=run_error,
finish_error=finish_error,
failed_command=failed_command,
command_error_recovery_types=command_error_recovery_types or {},
run_started_at=run_started_at,
latest_command_hash=latest_command_hash,
stopped_by_estop=False,
Expand Down

0 comments on commit f95af4f

Please sign in to comment.