Explicitly keep track of the current recovery target.

Opentrons · Apr 8, 2024 · a880096 · a880096
1 parent bd14851
commit a880096
Show file tree

Hide file tree

Showing 5 changed files with 108 additions and 40 deletions.
diff --git a/api/src/opentrons/protocol_engine/actions/__init__.py b/api/src/opentrons/protocol_engine/actions/__init__.py
@@ -11,6 +11,7 @@
     PauseAction,
     PauseSource,
     StopAction,
+    ResumeFromRecoveryAction,
     FinishAction,
     HardwareStoppedAction,
     QueueCommandAction,
@@ -38,6 +39,7 @@
     "PlayAction",
     "PauseAction",
     "StopAction",
+    "ResumeFromRecoveryAction",
     "FinishAction",
     "HardwareStoppedAction",
     "QueueCommandAction",

diff --git a/api/src/opentrons/protocol_engine/state/commands.py b/api/src/opentrons/protocol_engine/state/commands.py
@@ -178,6 +178,9 @@ class CommandState:
     stable. Eventually, we might want this info to be stored directly on each command.
     """
 
+    recovery_target_command_id: Optional[str]
+    """If we're currently recovering from a command failure, which command it was."""
+
     finish_error: Optional[ErrorOccurrence]
     """The error that happened during the post-run finish steps (homing & dropping tips), if any."""
 
@@ -213,6 +216,7 @@ def __init__(
             finish_error=None,
             failed_command=None,
             command_error_recovery_types={},
+            recovery_target_command_id=None,
             run_completed_at=None,
             run_started_at=None,
             latest_command_hash=None,
@@ -300,6 +304,7 @@ def handle_action(self, action: Action) -> None:  # noqa: C901
             ):
                 if action.type == ErrorRecoveryType.WAIT_FOR_RECOVERY:
                     self._state.queue_status = QueueStatus.AWAITING_RECOVERY
+                    self._state.recovery_target_command_id = action.command_id
                 elif action.type == ErrorRecoveryType.FAIL_RUN:
                     other_command_ids_to_fail = (
                         self._state.command_history.get_queue_ids()
@@ -335,6 +340,7 @@ def handle_action(self, action: Action) -> None:  # noqa: C901
 
         elif isinstance(action, ResumeFromRecoveryAction):
             self._state.queue_status = QueueStatus.RUNNING
+            self._state.recovery_target_command_id = None
 
         elif isinstance(action, StopAction):
             if not self._state.run_result:
@@ -710,12 +716,7 @@ def get_all_commands_final(self) -> bool:
 
     def get_recovery_in_progress_for_command(self, command_id: str) -> bool:
         """Return whether the given command failed and its error recovery is in progress."""
-        command_is_most_recent_to_fail = (
-            self._state.failed_command is not None
-            and command_id == self._state.failed_command.command.id
-        )
-        recovery_is_ongoing = self.get_status() == EngineStatus.AWAITING_RECOVERY
-        return command_is_most_recent_to_fail and recovery_is_ongoing
+        return self._state.recovery_target_command_id == command_id
 
     def raise_fatal_command_error(self) -> None:
         """Raise the run's fatal command error, if there was one, as an exception.

diff --git a/api/tests/opentrons/protocol_engine/state/test_command_state.py b/api/tests/opentrons/protocol_engine/state/test_command_state.py
@@ -348,3 +348,89 @@ def test_error_recovery_type_tracking() -> None:
     view = CommandView(subject.state)
     assert view.get_error_recovery_type("c1") == ErrorRecoveryType.WAIT_FOR_RECOVERY
     assert view.get_error_recovery_type("c2") == ErrorRecoveryType.FAIL_RUN
+
+
+def test_get_recovery_in_progress_for_command() -> None:
+    """It should return whether error recovery is in progress for the given command."""
+    subject = CommandStore(config=_make_config(), is_door_open=False)
+    subject_view = CommandView(subject.state)
+
+    queue_1 = actions.QueueCommandAction(
+        "c1",
+        created_at=datetime.now(),
+        request=commands.CommentCreate(params=commands.CommentParams(message="")),
+        request_hash=None,
+    )
+    subject.handle_action(queue_1)
+    run_1 = actions.RunCommandAction(command_id="c1", started_at=datetime.now())
+    subject.handle_action(run_1)
+    fail_1 = actions.FailCommandAction(
+        command_id="c1",
+        error_id="c1-error",
+        failed_at=datetime.now(),
+        error=PythonException(RuntimeError()),
+        notes=[],
+        type=ErrorRecoveryType.WAIT_FOR_RECOVERY,
+        running_command=subject_view.get("c1"),
+    )
+    subject.handle_action(fail_1)
+
+    # c1 failed recoverably and we're currently recovering from it.
+    assert subject_view.get_recovery_in_progress_for_command("c1")
+
+    resume_from_1_recovery = actions.ResumeFromRecoveryAction()
+    subject.handle_action(resume_from_1_recovery)
+
+    # c1 failed recoverably, but we've already completed its recovery.
+    assert not subject_view.get_recovery_in_progress_for_command("c1")
+
+    queue_2 = actions.QueueCommandAction(
+        "c2",
+        created_at=datetime.now(),
+        request=commands.CommentCreate(params=commands.CommentParams(message="")),
+        request_hash=None,
+    )
+    subject.handle_action(queue_2)
+    run_2 = actions.RunCommandAction(command_id="c2", started_at=datetime.now())
+    subject.handle_action(run_2)
+    fail_2 = actions.FailCommandAction(
+        command_id="c2",
+        error_id="c2-error",
+        failed_at=datetime.now(),
+        error=PythonException(RuntimeError()),
+        notes=[],
+        type=ErrorRecoveryType.WAIT_FOR_RECOVERY,
+        running_command=subject_view.get("c2"),
+    )
+    subject.handle_action(fail_2)
+
+    # c2 failed recoverably and we're currently recovering from it.
+    assert subject_view.get_recovery_in_progress_for_command("c2")
+    # ...and that means we're *not* currently recovering from c1,
+    # even though it failed recoverably before.
+    assert not subject_view.get_recovery_in_progress_for_command("c1")
+
+    resume_from_2_recovery = actions.ResumeFromRecoveryAction()
+    subject.handle_action(resume_from_2_recovery)
+    queue_3 = actions.QueueCommandAction(
+        "c3",
+        created_at=datetime.now(),
+        request=commands.CommentCreate(params=commands.CommentParams(message="")),
+        request_hash=None,
+    )
+    subject.handle_action(queue_3)
+    run_3 = actions.RunCommandAction(command_id="c3", started_at=datetime.now())
+    subject.handle_action(run_3)
+    fail_3 = actions.FailCommandAction(
+        command_id="c3",
+        error_id="c3-error",
+        failed_at=datetime.now(),
+        error=PythonException(RuntimeError()),
+        notes=[],
+        type=ErrorRecoveryType.FAIL_RUN,
+        running_command=subject_view.get("c3"),
+    )
+    subject.handle_action(fail_3)
+
+    # c3 failed, but not recoverably.
+    assert not subject_view.get_recovery_in_progress_for_command("c2")
diff --git a/api/tests/opentrons/protocol_engine/state/test_command_store_old.py b/api/tests/opentrons/protocol_engine/state/test_command_store_old.py
@@ -83,6 +83,7 @@ def test_initial_state(
         finish_error=None,
         failed_command=None,
         command_error_recovery_types={},
+        recovery_target_command_id=None,
         latest_command_hash=None,
         stopped_by_estop=False,
     )
@@ -516,6 +517,7 @@ def test_command_store_handles_pause_action(pause_source: PauseSource) -> None:
         finish_error=None,
         failed_command=None,
         command_error_recovery_types={},
+        recovery_target_command_id=None,
         latest_command_hash=None,
         stopped_by_estop=False,
     )
@@ -541,6 +543,7 @@ def test_command_store_handles_play_action(pause_source: PauseSource) -> None:
         finish_error=None,
         failed_command=None,
         command_error_recovery_types={},
+        recovery_target_command_id=None,
         run_started_at=datetime(year=2021, month=1, day=1),
         latest_command_hash=None,
         stopped_by_estop=False,
@@ -572,6 +575,7 @@ def test_command_store_handles_finish_action() -> None:
         finish_error=None,
         failed_command=None,
         command_error_recovery_types={},
+        recovery_target_command_id=None,
         run_started_at=datetime(year=2021, month=1, day=1),
         latest_command_hash=None,
         stopped_by_estop=False,
@@ -618,6 +622,7 @@ def test_command_store_handles_stop_action(from_estop: bool) -> None:
         finish_error=None,
         failed_command=None,
         command_error_recovery_types={},
+        recovery_target_command_id=None,
         run_started_at=datetime(year=2021, month=1, day=1),
         latest_command_hash=None,
         stopped_by_estop=from_estop,
@@ -648,6 +653,7 @@ def test_command_store_cannot_restart_after_should_stop() -> None:
         finish_error=None,
         failed_command=None,
         command_error_recovery_types={},
+        recovery_target_command_id=None,
         run_started_at=None,
         latest_command_hash=None,
         stopped_by_estop=False,
@@ -780,6 +786,7 @@ def test_command_store_wraps_unknown_errors() -> None:
         run_started_at=None,
         failed_command=None,
         command_error_recovery_types={},
+        recovery_target_command_id=None,
         latest_command_hash=None,
         stopped_by_estop=False,
     )
@@ -841,6 +848,7 @@ def __init__(self, message: str) -> None:
         ),
         failed_command=None,
         command_error_recovery_types={},
+        recovery_target_command_id=None,
         run_started_at=None,
         latest_command_hash=None,
         stopped_by_estop=False,
@@ -873,6 +881,7 @@ def test_command_store_ignores_stop_after_graceful_finish() -> None:
         finish_error=None,
         failed_command=None,
         command_error_recovery_types={},
+        recovery_target_command_id=None,
         run_started_at=datetime(year=2021, month=1, day=1),
         latest_command_hash=None,
         stopped_by_estop=False,
@@ -905,6 +914,7 @@ def test_command_store_ignores_finish_after_non_graceful_stop() -> None:
         finish_error=None,
         failed_command=None,
         command_error_recovery_types={},
+        recovery_target_command_id=None,
         run_started_at=datetime(year=2021, month=1, day=1),
         latest_command_hash=None,
         stopped_by_estop=False,
@@ -933,6 +943,7 @@ def test_handles_hardware_stopped() -> None:
         finish_error=None,
         failed_command=None,
         command_error_recovery_types={},
+        recovery_target_command_id=None,
         run_started_at=None,
         latest_command_hash=None,
         stopped_by_estop=False,

diff --git a/api/tests/opentrons/protocol_engine/state/test_command_view_old.py b/api/tests/opentrons/protocol_engine/state/test_command_view_old.py
@@ -58,6 +58,7 @@ def get_command_view(
     run_error: Optional[errors.ErrorOccurrence] = None,
     failed_command: Optional[CommandEntry] = None,
     command_error_recovery_types: Optional[Dict[str, ErrorRecoveryType]] = None,
+    recovery_target_command_id: Optional[str] = None,
     finish_error: Optional[errors.ErrorOccurrence] = None,
     commands: Sequence[cmd.Command] = (),
     latest_command_hash: Optional[str] = None,
@@ -90,6 +91,7 @@ def get_command_view(
         finish_error=finish_error,
         failed_command=failed_command,
         command_error_recovery_types=command_error_recovery_types or {},
+        recovery_target_command_id=recovery_target_command_id,
         run_started_at=run_started_at,
         latest_command_hash=latest_command_hash,
         stopped_by_estop=False,
@@ -265,40 +267,6 @@ def test_get_all_commands_final() -> None:
     assert subject.get_all_commands_final() is False
 
 
-def test_get_recovery_in_progress_for_command() -> None:
-    """It should return whether error recovery is in progress for the given command."""
-    # command-id is not a failed command:
-    subject = get_command_view()
-    assert not subject.get_recovery_in_progress_for_command("command-id")
-
-    # command-id is a failed command and we ARE recovering from it:
-    subject = get_command_view(
-        failed_command=CommandEntry(
-            create_failed_command(command_id="command-id"), index=0
-        ),
-        queue_status=QueueStatus.AWAITING_RECOVERY,
-    )
-    assert subject.get_recovery_in_progress_for_command("command-id")
-
-    # command-id is a failed command but we're not recovering from any command:
-    subject = get_command_view(
-        failed_command=CommandEntry(
-            create_failed_command(command_id="command-id"), index=0
-        ),
-        queue_status=QueueStatus.PAUSED,
-    )
-    assert not subject.get_recovery_in_progress_for_command("command-id")
-
-    # command-id is a failed command, but we're recovering from a different one:
-    subject = get_command_view(
-        failed_command=CommandEntry(
-            create_failed_command(command_id="different-command-id"), index=0
-        ),
-        queue_status=QueueStatus.AWAITING_RECOVERY,
-    )
-    assert not subject.get_recovery_in_progress_for_command("command-id")
-
-
 def test_raise_fatal_command_error() -> None:
     """It should raise the fatal command error."""
     completed_command = create_succeeded_command(command_id="command-id-1")