fix(robot-server): Update tests to properly check new bad run records (…

…#14723) # Overview Follow-ups for #14711 (comment). #14711 added safer error propagation for when robot-server encounters bad stored run data. As part of that, if it finds a run where the `state_summary` SQL column is `NULL`, it treats that as bad data and propagates the error to HTTP clients. When you restart the robot while there is an active run, then no state summary will be inserted (this only happens when the run is ended and the state moves from engine to sql) and the run will be bad. We say that this is in fact a bad run because to the client, there is no distinction between state summary and run. A run with an empty state summary does not have correct data and does not represent what occurred. Add a regression test to make sure this is how we handle runs that did not have state summaries persisted. # Testing - [x] create a run with this branch on a flex and restart the flex (or kill the robot server process - this isn't about the details of when things are written to disk, just the lifetime of the data here) and see that the run is now bad --------- Co-authored-by: Seth Foster <[email protected]>
Opentrons · Mar 25, 2024 · 244a80e · 244a80e
1 parent 29414e2
commit 244a80e
Show file tree

Hide file tree

Showing 3 changed files with 157 additions and 86 deletions.
diff --git a/robot-server/robot_server/runs/run_models.py b/robot-server/robot_server/runs/run_models.py
@@ -28,7 +28,7 @@ class RunDataError(ErrorDetails):
     """A model for an error loading a run."""
 
     title: str = Field(
-        "Run Loading Error",
+        "Run Data Error",
         description="A short, human readable name for this type of error",
     )
     id: Literal["RunDataError"] = "RunDataError"

diff --git a/robot-server/tests/integration/http_api/persistence/test_compatibility.py b/robot-server/tests/integration/http_api/persistence/test_compatibility.py
@@ -1,8 +1,10 @@
+from __future__ import annotations
+
 from dataclasses import dataclass, field
 from pathlib import Path
 from shutil import copytree
 from tempfile import TemporaryDirectory
-from typing import List
+from typing import List, Literal, Union, TYPE_CHECKING
 
 import anyio
 import pytest
@@ -12,6 +14,9 @@
 
 from .persistence_snapshots_dir import PERSISTENCE_SNAPSHOTS_DIR
 
+if TYPE_CHECKING:
+    from _pytest.mark import ParameterSet
+
 # Allow plenty of time for database migrations, which can take a while in our CI runners.
 _STARTUP_TIMEOUT = 60
 
@@ -28,6 +33,14 @@
 class Run:
     id: str
     expected_command_count: int
+    ok: Literal[True] = True
+
+
+@dataclass
+class BadRun:
+    id: str
+    expected_command_count: int
+    ok: Literal[False] = False
 
 
 @dataclass
@@ -36,7 +49,7 @@ class Snapshot:
 
     version: str
     expected_protocol_count: int
-    expected_runs: List[Run]
+    expected_runs: List[Union[Run, BadRun]]
     protocols_with_no_analyses: List[str] = field(default_factory=list)
 
     def get_copy(self) -> Path:
@@ -58,86 +71,103 @@ def get_copy(self) -> Path:
 )
 
 
-snapshots: List[(Snapshot)] = [
-    Snapshot(
-        version="v6.0.1",
-        expected_protocol_count=4,
-        expected_runs=[
-            Run("7bc1f20d-3925-4aa2-b200-82906112816f", 23),
-            Run("1b00190c-013f-463d-b371-5bf49b6ad61f", 16),
-            Run("8165be3f-382f-4b1f-97d7-f3c4ae613868", 65),
-            Run("467761f3-7339-4b8d-9007-4482500657da", 65),
-            Run("f7817fa9-bc80-45c0-afea-f7c4af30a663", 333),
-        ],
+snapshots: List[ParameterSet] = [
+    pytest.param(
+        Snapshot(
+            version="v6.0.1",
+            expected_protocol_count=4,
+            expected_runs=[
+                Run("7bc1f20d-3925-4aa2-b200-82906112816f", 23),
+                Run("1b00190c-013f-463d-b371-5bf49b6ad61f", 16),
+                Run("8165be3f-382f-4b1f-97d7-f3c4ae613868", 65),
+                Run("467761f3-7339-4b8d-9007-4482500657da", 65),
+                Run("f7817fa9-bc80-45c0-afea-f7c4af30a663", 333),
+            ],
+        ),
+        id="v6.0.1",
     ),
-    Snapshot(
-        version="v6.1.0",
-        expected_protocol_count=2,
-        expected_runs=[
-            Run("a4338d46-96af-4e23-877d-1d79227a0946", 147),
-            Run("efc7374f-2e64-45ea-83fe-bd7a55f2699e", 205),
-        ],
+    pytest.param(
+        Snapshot(
+            version="v6.1.0",
+            expected_protocol_count=2,
+            expected_runs=[
+                Run("a4338d46-96af-4e23-877d-1d79227a0946", 147),
+                Run("efc7374f-2e64-45ea-83fe-bd7a55f2699e", 205),
+            ],
+        ),
+        id="v6.0.1",
     ),
-    Snapshot(
-        version="v6.2.0",
-        expected_protocol_count=2,
-        expected_runs=[
-            Run("199b991d-db3c-49ff-9b4f-905118c10685", 125),
-            Run("25a66ec6-2137-4680-8a94-d53c0e2a7488", 87),
-        ],
+    pytest.param(
+        Snapshot(
+            version="v6.2.0",
+            expected_protocol_count=2,
+            expected_runs=[
+                Run("199b991d-db3c-49ff-9b4f-905118c10685", 125),
+                Run("25a66ec6-2137-4680-8a94-d53c0e2a7488", 87),
+            ],
+        ),
+        id="v6.2.0",
     ),
-    Snapshot(
-        version="v6.2.0_large",
-        expected_protocol_count=17,
-        expected_runs=[
-            Run("eeb17dc0-1878-432a-bf3f-33e7d3023b8d", 218),
-            Run("917cf0f8-8b79-47ab-a407-918c182eb6df", 125),
-            Run("7b87bac2-680a-4757-a10f-8341a6dce540", 185),
-            Run("0b97477c-844d-406a-87e8-0852421d7212", 0),
-            Run("f31659a6-33c9-406d-beb5-da2ec19ef063", 120),
-            Run("965b45f4-f296-44bf-ae20-df297d3a35af", 8),
-            Run("b97b0ee8-2ba4-43cd-99aa-601b60f5b75d", 13),
-            Run("7dd90a28-14b6-4e6f-86a8-41ca6e6e42ae", 11),
-            Run("dc9162c2-f9f6-48aa-a923-7ba252d3eb1d", 15),
-            Run("2d9b6f1b-e2fd-40a9-9219-504df2c89305", 0),
-            Run("9ba966c6-bc2f-4c65-b898-59a4f2530f35", 0),
-            Run("5f30a0dd-e4da-4f24-abce-7468067d883a", 0),
-            Run("83f0bad0-6bb2-4ecd-bccf-f14667298168", 0),
-            Run("0b97363d-0910-43a0-b5a2-b6a62ad2fa6b", 96),
-            Run("35c014ec-b6ea-4665-8149-5c6340cbc5ca", 0),
-            Run("d2b68ac6-5c4f-4914-bc2e-f306a976d582", 220),
-        ],
-        protocols_with_no_analyses=[
-            "429e72e1-6ff1-4328-8a1d-c13fe3ac0c80",
-            "e3515d46-3c3b-425b-8734-bd6e38d6a729",
-        ],
+    pytest.param(
+        Snapshot(
+            version="v6.2.0_large",
+            expected_protocol_count=17,
+            expected_runs=[
+                Run("eeb17dc0-1878-432a-bf3f-33e7d3023b8d", 218),
+                Run("917cf0f8-8b79-47ab-a407-918c182eb6df", 125),
+                Run("7b87bac2-680a-4757-a10f-8341a6dce540", 185),
+                BadRun("0b97477c-844d-406a-87e8-0852421d7212", 0),
+                Run("f31659a6-33c9-406d-beb5-da2ec19ef063", 120),
+                Run("965b45f4-f296-44bf-ae20-df297d3a35af", 8),
+                Run("b97b0ee8-2ba4-43cd-99aa-601b60f5b75d", 13),
+                Run("7dd90a28-14b6-4e6f-86a8-41ca6e6e42ae", 11),
+                Run("dc9162c2-f9f6-48aa-a923-7ba252d3eb1d", 15),
+                BadRun("2d9b6f1b-e2fd-40a9-9219-504df2c89305", 0),
+                # This is a run of a protocol with no commands
+                Run("9ba966c6-bc2f-4c65-b898-59a4f2530f35", 0),
+                BadRun("5f30a0dd-e4da-4f24-abce-7468067d883a", 0),
+                # Stopped early
+                Run("83f0bad0-6bb2-4ecd-bccf-f14667298168", 0),
+                Run("0b97363d-0910-43a0-b5a2-b6a62ad2fa6b", 96),
+                Run("35c014ec-b6ea-4665-8149-5c6340cbc5ca", 0),
+                Run("d2b68ac6-5c4f-4914-bc2e-f306a976d582", 220),
+            ],
+            protocols_with_no_analyses=[
+                "429e72e1-6ff1-4328-8a1d-c13fe3ac0c80",
+                "e3515d46-3c3b-425b-8734-bd6e38d6a729",
+            ],
+        ),
+        id="v6.2.0_large",
     ),
-    flex_dev_compat_snapshot,
-    Snapshot(
-        version="v7.1.1",
-        expected_protocol_count=10,
-        expected_runs=[
-            Run("69fe2d6f-3bda-4dfb-800b-cd93017d1cbd", 4634),
-            Run("04ec9eda-19b2-4850-9148-d28112565b37", 0),
-            Run("7edf736e-2b5c-41c0-be37-7ab7ac215445", 787),
-            Run("4f623a64-20ce-464b-a118-e8a785911613", 0),
-            Run("237fd93f-e4a5-4c37-9675-58a8a3c32bbb", 953),
-            Run("59706cac-74d5-4542-8b38-499d11ad352e", 54),
-            Run("ef7794a5-3afd-438d-a69e-34138d9ae520", 0),
-            Run("b0c6a8fa-f117-4f5f-b5f0-22c487d83526", 359),
-            Run("62011896-29f5-40b4-83ee-29d7b7817583", 0),
-            Run("790d551d-68f0-4513-8896-bc175f629546", 1541),
-            Run("92dafa40-3425-4a74-9d20-a3fc08365a92", 0),
-            Run("7622aed6-08bf-4339-accc-952dcad310ce", 205),
-            Run("b710a6c2-d373-4bc1-ad14-18f1094d7104", 0),
-            Run("0b593bb0-d2d8-4c21-afc5-44e4868aeeef", 1609),
-            Run("22d99b67-3062-48ed-80bd-4505def1bb7d", 0),
-            Run("519a45e1-f68a-454f-bac9-0910eaddbbac", 18),
-            Run("7367493c-40b1-4516-abf5-9c5b0228d27f", 679),
-            Run("4af7e324-2f2b-40bc-803c-e9100016d2b3", 1183),
-            Run("e164059b-57dc-4a68-a23b-b026a7addf2a", 1467),
-            Run("ae2e23fc-74fb-4b3f-9b8b-d632e31b222a", 0),
-        ],
+    pytest.param(flex_dev_compat_snapshot, id="flex_dev_compat"),
+    pytest.param(
+        Snapshot(
+            version="v7.1.1",
+            expected_protocol_count=10,
+            expected_runs=[
+                Run("69fe2d6f-3bda-4dfb-800b-cd93017d1cbd", 4634),
+                BadRun("04ec9eda-19b2-4850-9148-d28112565b37", 0),
+                Run("7edf736e-2b5c-41c0-be37-7ab7ac215445", 787),
+                BadRun("4f623a64-20ce-464b-a118-e8a785911613", 0),
+                Run("237fd93f-e4a5-4c37-9675-58a8a3c32bbb", 953),
+                Run("59706cac-74d5-4542-8b38-499d11ad352e", 54),
+                BadRun("ef7794a5-3afd-438d-a69e-34138d9ae520", 0),
+                Run("b0c6a8fa-f117-4f5f-b5f0-22c487d83526", 359),
+                BadRun("62011896-29f5-40b4-83ee-29d7b7817583", 0),
+                Run("790d551d-68f0-4513-8896-bc175f629546", 1541),
+                BadRun("92dafa40-3425-4a74-9d20-a3fc08365a92", 0),
+                Run("7622aed6-08bf-4339-accc-952dcad310ce", 205),
+                BadRun("b710a6c2-d373-4bc1-ad14-18f1094d7104", 0),
+                Run("0b593bb0-d2d8-4c21-afc5-44e4868aeeef", 1609),
+                BadRun("22d99b67-3062-48ed-80bd-4505def1bb7d", 0),
+                Run("519a45e1-f68a-454f-bac9-0910eaddbbac", 18),
+                Run("7367493c-40b1-4516-abf5-9c5b0228d27f", 679),
+                Run("4af7e324-2f2b-40bc-803c-e9100016d2b3", 1183),
+                Run("e164059b-57dc-4a68-a23b-b026a7addf2a", 1467),
+                BadRun("ae2e23fc-74fb-4b3f-9b8b-d632e31b222a", 0),
+            ],
+        ),
+        id="v7.1.1",
     ),
 ]
 
@@ -198,7 +228,11 @@ async def test_protocols_analyses_and_runs_available_from_older_persistence_dir(
             assert all_run_ids == [r.id for r in snapshot.expected_runs]
 
             for expected_run in snapshot.expected_runs:
-                await robot_client.get_run(run_id=expected_run.id)
+                run = (await robot_client.get_run(run_id=expected_run.id)).json()
+                if expected_run.ok:
+                    assert run["data"].get("dataError") is None
+                else:
+                    assert run["data"].get("dataError") is not None
 
                 all_command_summaries = (
                     await robot_client.get_run_commands(

diff --git a/robot-server/tests/integration/http_api/runs/test_persistence.py b/robot-server/tests/integration/http_api/runs/test_persistence.py
@@ -1,5 +1,6 @@
-from typing import Any, AsyncGenerator, Dict, NamedTuple, cast
+from copy import deepcopy
 from datetime import datetime
+from typing import Any, AsyncGenerator, Dict, NamedTuple, cast
 
 import anyio
 import pytest
@@ -73,7 +74,46 @@ async def _assert_run_persisted(
     assert get_persisted_run_response.json()["data"] == expected_run_data
 
 
-async def test_runs_persist(client_and_server: ClientServerFixture) -> None:
+async def test_untimely_restart_marks_runs_bad(
+    client_and_server: ClientServerFixture,
+) -> None:
+    """Test that a run persists even if the server was restarted before the run was
+    gracefully closed out."""
+    client, server = client_and_server
+
+    # create a run
+    create_run_response = await client.post_run(req_body={"data": {}})
+    run_id = create_run_response.json()["data"]["id"]
+
+    run = (await client.get_run(run_id)).json()["data"]
+    assert run["status"] == "idle"
+    assert run["current"] is True
+    # Some loss of state is expected.
+    expected_run = deepcopy(run)
+    expected_run["status"] = "stopped"
+    expected_run["current"] = False
+    expected_run["ok"] = False
+    expected_run["dataError"] = {
+        "id": "RunDataError",
+        "title": "Run Data Error",
+        "detail": "There was no engine state data for this run.",
+        "meta": {
+            "code": "4008",
+            "detail": {},
+            "message": "There was no engine state data for this run.",
+            "type": "InvalidStoredData",
+            "wrapping": [],
+        },
+        "errorCode": "4008",
+    }
+
+    # reboot the server
+    await client_and_server.restart()
+
+    await _assert_run_persisted(robot_client=client, expected_run_data=expected_run)
+
+
+async def test_runs_persist_via_patch(client_and_server: ClientServerFixture) -> None:
     """Test that runs are persisted through dev server restart."""
     client, server = client_and_server
 
@@ -100,10 +140,7 @@ async def test_runs_persist_via_actions_router(
     """Test that runs commands and state
     are persisted when calling play action through dev server restart."""
     client, server = client_and_server
-    # await client.post_protocol([Path("./tests/integration/protocols/simple.py")])
-    #
-    # protocols = (await client.get_protocols()).json()["data"]
-    # protocol_id = protocols[0]["id"]
+
     # create a run
     create_run_response = await client.post_run(req_body={"data": {}})
     run_id = create_run_response.json()["data"]["id"]