From 244a80e5985cf608e7b89013e36bef2337f71619 Mon Sep 17 00:00:00 2001
From: Max Marrone <max@opentrons.com>
Date: Mon, 25 Mar 2024 18:37:46 -0400
Subject: [PATCH] fix(robot-server): Update tests to properly check new bad run
 records (#14723)

# Overview

Follow-ups for
https://github.com/Opentrons/opentrons/pull/14711#discussion_r1534599540.

#14711 added safer error propagation for when robot-server encounters
bad stored run data. As part of that, if it finds a run where the
`state_summary` SQL column is `NULL`, it treats that as bad data and
propagates the error to HTTP clients.

When you restart the robot while there is an active run, then no state
summary will be inserted (this only happens when the run is ended and
the state moves from engine to sql) and the run will be bad.

We say that this is in fact a bad run because to the client, there is no
distinction between state summary and run. A run with an empty state
summary does not have correct data and does not represent what occurred.

Add a regression test to make sure this is how we handle runs that did
not have state summaries persisted.

# Testing
- [x] create a run with this branch on a flex and restart the flex (or
kill the robot server process - this isn't about the details of when
things are written to disk, just the lifetime of the data here) and see
that the run is now bad

---------

Co-authored-by: Seth Foster <seth@opentrons.com>
---
 robot-server/robot_server/runs/run_models.py  |   2 +-
 .../persistence/test_compatibility.py         | 192 +++++++++++-------
 .../http_api/runs/test_persistence.py         |  49 ++++-
 3 files changed, 157 insertions(+), 86 deletions(-)

diff --git a/robot-server/robot_server/runs/run_models.py b/robot-server/robot_server/runs/run_models.py
index 379feeaea0a..e05cd25330c 100644
--- a/robot-server/robot_server/runs/run_models.py
+++ b/robot-server/robot_server/runs/run_models.py
@@ -28,7 +28,7 @@ class RunDataError(ErrorDetails):
     """A model for an error loading a run."""
 
     title: str = Field(
-        "Run Loading Error",
+        "Run Data Error",
         description="A short, human readable name for this type of error",
     )
     id: Literal["RunDataError"] = "RunDataError"
diff --git a/robot-server/tests/integration/http_api/persistence/test_compatibility.py b/robot-server/tests/integration/http_api/persistence/test_compatibility.py
index 33aa5a6cfd9..0e487585bed 100644
--- a/robot-server/tests/integration/http_api/persistence/test_compatibility.py
+++ b/robot-server/tests/integration/http_api/persistence/test_compatibility.py
@@ -1,8 +1,10 @@
+from __future__ import annotations
+
 from dataclasses import dataclass, field
 from pathlib import Path
 from shutil import copytree
 from tempfile import TemporaryDirectory
-from typing import List
+from typing import List, Literal, Union, TYPE_CHECKING
 
 import anyio
 import pytest
@@ -12,6 +14,9 @@
 
 from .persistence_snapshots_dir import PERSISTENCE_SNAPSHOTS_DIR
 
+if TYPE_CHECKING:
+    from _pytest.mark import ParameterSet
+
 # Allow plenty of time for database migrations, which can take a while in our CI runners.
 _STARTUP_TIMEOUT = 60
 
@@ -28,6 +33,14 @@
 class Run:
     id: str
     expected_command_count: int
+    ok: Literal[True] = True
+
+
+@dataclass
+class BadRun:
+    id: str
+    expected_command_count: int
+    ok: Literal[False] = False
 
 
 @dataclass
@@ -36,7 +49,7 @@ class Snapshot:
 
     version: str
     expected_protocol_count: int
-    expected_runs: List[Run]
+    expected_runs: List[Union[Run, BadRun]]
     protocols_with_no_analyses: List[str] = field(default_factory=list)
 
     def get_copy(self) -> Path:
@@ -58,86 +71,103 @@ def get_copy(self) -> Path:
 )
 
 
-snapshots: List[(Snapshot)] = [
-    Snapshot(
-        version="v6.0.1",
-        expected_protocol_count=4,
-        expected_runs=[
-            Run("7bc1f20d-3925-4aa2-b200-82906112816f", 23),
-            Run("1b00190c-013f-463d-b371-5bf49b6ad61f", 16),
-            Run("8165be3f-382f-4b1f-97d7-f3c4ae613868", 65),
-            Run("467761f3-7339-4b8d-9007-4482500657da", 65),
-            Run("f7817fa9-bc80-45c0-afea-f7c4af30a663", 333),
-        ],
+snapshots: List[ParameterSet] = [
+    pytest.param(
+        Snapshot(
+            version="v6.0.1",
+            expected_protocol_count=4,
+            expected_runs=[
+                Run("7bc1f20d-3925-4aa2-b200-82906112816f", 23),
+                Run("1b00190c-013f-463d-b371-5bf49b6ad61f", 16),
+                Run("8165be3f-382f-4b1f-97d7-f3c4ae613868", 65),
+                Run("467761f3-7339-4b8d-9007-4482500657da", 65),
+                Run("f7817fa9-bc80-45c0-afea-f7c4af30a663", 333),
+            ],
+        ),
+        id="v6.0.1",
     ),
-    Snapshot(
-        version="v6.1.0",
-        expected_protocol_count=2,
-        expected_runs=[
-            Run("a4338d46-96af-4e23-877d-1d79227a0946", 147),
-            Run("efc7374f-2e64-45ea-83fe-bd7a55f2699e", 205),
-        ],
+    pytest.param(
+        Snapshot(
+            version="v6.1.0",
+            expected_protocol_count=2,
+            expected_runs=[
+                Run("a4338d46-96af-4e23-877d-1d79227a0946", 147),
+                Run("efc7374f-2e64-45ea-83fe-bd7a55f2699e", 205),
+            ],
+        ),
+        id="v6.0.1",
     ),
-    Snapshot(
-        version="v6.2.0",
-        expected_protocol_count=2,
-        expected_runs=[
-            Run("199b991d-db3c-49ff-9b4f-905118c10685", 125),
-            Run("25a66ec6-2137-4680-8a94-d53c0e2a7488", 87),
-        ],
+    pytest.param(
+        Snapshot(
+            version="v6.2.0",
+            expected_protocol_count=2,
+            expected_runs=[
+                Run("199b991d-db3c-49ff-9b4f-905118c10685", 125),
+                Run("25a66ec6-2137-4680-8a94-d53c0e2a7488", 87),
+            ],
+        ),
+        id="v6.2.0",
     ),
-    Snapshot(
-        version="v6.2.0_large",
-        expected_protocol_count=17,
-        expected_runs=[
-            Run("eeb17dc0-1878-432a-bf3f-33e7d3023b8d", 218),
-            Run("917cf0f8-8b79-47ab-a407-918c182eb6df", 125),
-            Run("7b87bac2-680a-4757-a10f-8341a6dce540", 185),
-            Run("0b97477c-844d-406a-87e8-0852421d7212", 0),
-            Run("f31659a6-33c9-406d-beb5-da2ec19ef063", 120),
-            Run("965b45f4-f296-44bf-ae20-df297d3a35af", 8),
-            Run("b97b0ee8-2ba4-43cd-99aa-601b60f5b75d", 13),
-            Run("7dd90a28-14b6-4e6f-86a8-41ca6e6e42ae", 11),
-            Run("dc9162c2-f9f6-48aa-a923-7ba252d3eb1d", 15),
-            Run("2d9b6f1b-e2fd-40a9-9219-504df2c89305", 0),
-            Run("9ba966c6-bc2f-4c65-b898-59a4f2530f35", 0),
-            Run("5f30a0dd-e4da-4f24-abce-7468067d883a", 0),
-            Run("83f0bad0-6bb2-4ecd-bccf-f14667298168", 0),
-            Run("0b97363d-0910-43a0-b5a2-b6a62ad2fa6b", 96),
-            Run("35c014ec-b6ea-4665-8149-5c6340cbc5ca", 0),
-            Run("d2b68ac6-5c4f-4914-bc2e-f306a976d582", 220),
-        ],
-        protocols_with_no_analyses=[
-            "429e72e1-6ff1-4328-8a1d-c13fe3ac0c80",
-            "e3515d46-3c3b-425b-8734-bd6e38d6a729",
-        ],
+    pytest.param(
+        Snapshot(
+            version="v6.2.0_large",
+            expected_protocol_count=17,
+            expected_runs=[
+                Run("eeb17dc0-1878-432a-bf3f-33e7d3023b8d", 218),
+                Run("917cf0f8-8b79-47ab-a407-918c182eb6df", 125),
+                Run("7b87bac2-680a-4757-a10f-8341a6dce540", 185),
+                BadRun("0b97477c-844d-406a-87e8-0852421d7212", 0),
+                Run("f31659a6-33c9-406d-beb5-da2ec19ef063", 120),
+                Run("965b45f4-f296-44bf-ae20-df297d3a35af", 8),
+                Run("b97b0ee8-2ba4-43cd-99aa-601b60f5b75d", 13),
+                Run("7dd90a28-14b6-4e6f-86a8-41ca6e6e42ae", 11),
+                Run("dc9162c2-f9f6-48aa-a923-7ba252d3eb1d", 15),
+                BadRun("2d9b6f1b-e2fd-40a9-9219-504df2c89305", 0),
+                # This is a run of a protocol with no commands
+                Run("9ba966c6-bc2f-4c65-b898-59a4f2530f35", 0),
+                BadRun("5f30a0dd-e4da-4f24-abce-7468067d883a", 0),
+                # Stopped early
+                Run("83f0bad0-6bb2-4ecd-bccf-f14667298168", 0),
+                Run("0b97363d-0910-43a0-b5a2-b6a62ad2fa6b", 96),
+                Run("35c014ec-b6ea-4665-8149-5c6340cbc5ca", 0),
+                Run("d2b68ac6-5c4f-4914-bc2e-f306a976d582", 220),
+            ],
+            protocols_with_no_analyses=[
+                "429e72e1-6ff1-4328-8a1d-c13fe3ac0c80",
+                "e3515d46-3c3b-425b-8734-bd6e38d6a729",
+            ],
+        ),
+        id="v6.2.0_large",
     ),
-    flex_dev_compat_snapshot,
-    Snapshot(
-        version="v7.1.1",
-        expected_protocol_count=10,
-        expected_runs=[
-            Run("69fe2d6f-3bda-4dfb-800b-cd93017d1cbd", 4634),
-            Run("04ec9eda-19b2-4850-9148-d28112565b37", 0),
-            Run("7edf736e-2b5c-41c0-be37-7ab7ac215445", 787),
-            Run("4f623a64-20ce-464b-a118-e8a785911613", 0),
-            Run("237fd93f-e4a5-4c37-9675-58a8a3c32bbb", 953),
-            Run("59706cac-74d5-4542-8b38-499d11ad352e", 54),
-            Run("ef7794a5-3afd-438d-a69e-34138d9ae520", 0),
-            Run("b0c6a8fa-f117-4f5f-b5f0-22c487d83526", 359),
-            Run("62011896-29f5-40b4-83ee-29d7b7817583", 0),
-            Run("790d551d-68f0-4513-8896-bc175f629546", 1541),
-            Run("92dafa40-3425-4a74-9d20-a3fc08365a92", 0),
-            Run("7622aed6-08bf-4339-accc-952dcad310ce", 205),
-            Run("b710a6c2-d373-4bc1-ad14-18f1094d7104", 0),
-            Run("0b593bb0-d2d8-4c21-afc5-44e4868aeeef", 1609),
-            Run("22d99b67-3062-48ed-80bd-4505def1bb7d", 0),
-            Run("519a45e1-f68a-454f-bac9-0910eaddbbac", 18),
-            Run("7367493c-40b1-4516-abf5-9c5b0228d27f", 679),
-            Run("4af7e324-2f2b-40bc-803c-e9100016d2b3", 1183),
-            Run("e164059b-57dc-4a68-a23b-b026a7addf2a", 1467),
-            Run("ae2e23fc-74fb-4b3f-9b8b-d632e31b222a", 0),
-        ],
+    pytest.param(flex_dev_compat_snapshot, id="flex_dev_compat"),
+    pytest.param(
+        Snapshot(
+            version="v7.1.1",
+            expected_protocol_count=10,
+            expected_runs=[
+                Run("69fe2d6f-3bda-4dfb-800b-cd93017d1cbd", 4634),
+                BadRun("04ec9eda-19b2-4850-9148-d28112565b37", 0),
+                Run("7edf736e-2b5c-41c0-be37-7ab7ac215445", 787),
+                BadRun("4f623a64-20ce-464b-a118-e8a785911613", 0),
+                Run("237fd93f-e4a5-4c37-9675-58a8a3c32bbb", 953),
+                Run("59706cac-74d5-4542-8b38-499d11ad352e", 54),
+                BadRun("ef7794a5-3afd-438d-a69e-34138d9ae520", 0),
+                Run("b0c6a8fa-f117-4f5f-b5f0-22c487d83526", 359),
+                BadRun("62011896-29f5-40b4-83ee-29d7b7817583", 0),
+                Run("790d551d-68f0-4513-8896-bc175f629546", 1541),
+                BadRun("92dafa40-3425-4a74-9d20-a3fc08365a92", 0),
+                Run("7622aed6-08bf-4339-accc-952dcad310ce", 205),
+                BadRun("b710a6c2-d373-4bc1-ad14-18f1094d7104", 0),
+                Run("0b593bb0-d2d8-4c21-afc5-44e4868aeeef", 1609),
+                BadRun("22d99b67-3062-48ed-80bd-4505def1bb7d", 0),
+                Run("519a45e1-f68a-454f-bac9-0910eaddbbac", 18),
+                Run("7367493c-40b1-4516-abf5-9c5b0228d27f", 679),
+                Run("4af7e324-2f2b-40bc-803c-e9100016d2b3", 1183),
+                Run("e164059b-57dc-4a68-a23b-b026a7addf2a", 1467),
+                BadRun("ae2e23fc-74fb-4b3f-9b8b-d632e31b222a", 0),
+            ],
+        ),
+        id="v7.1.1",
     ),
 ]
 
@@ -198,7 +228,11 @@ async def test_protocols_analyses_and_runs_available_from_older_persistence_dir(
             assert all_run_ids == [r.id for r in snapshot.expected_runs]
 
             for expected_run in snapshot.expected_runs:
-                await robot_client.get_run(run_id=expected_run.id)
+                run = (await robot_client.get_run(run_id=expected_run.id)).json()
+                if expected_run.ok:
+                    assert run["data"].get("dataError") is None
+                else:
+                    assert run["data"].get("dataError") is not None
 
                 all_command_summaries = (
                     await robot_client.get_run_commands(
diff --git a/robot-server/tests/integration/http_api/runs/test_persistence.py b/robot-server/tests/integration/http_api/runs/test_persistence.py
index a8a46546108..45b55202fda 100644
--- a/robot-server/tests/integration/http_api/runs/test_persistence.py
+++ b/robot-server/tests/integration/http_api/runs/test_persistence.py
@@ -1,5 +1,6 @@
-from typing import Any, AsyncGenerator, Dict, NamedTuple, cast
+from copy import deepcopy
 from datetime import datetime
+from typing import Any, AsyncGenerator, Dict, NamedTuple, cast
 
 import anyio
 import pytest
@@ -73,7 +74,46 @@ async def _assert_run_persisted(
     assert get_persisted_run_response.json()["data"] == expected_run_data
 
 
-async def test_runs_persist(client_and_server: ClientServerFixture) -> None:
+async def test_untimely_restart_marks_runs_bad(
+    client_and_server: ClientServerFixture,
+) -> None:
+    """Test that a run persists even if the server was restarted before the run was
+    gracefully closed out."""
+    client, server = client_and_server
+
+    # create a run
+    create_run_response = await client.post_run(req_body={"data": {}})
+    run_id = create_run_response.json()["data"]["id"]
+
+    run = (await client.get_run(run_id)).json()["data"]
+    assert run["status"] == "idle"
+    assert run["current"] is True
+    # Some loss of state is expected.
+    expected_run = deepcopy(run)
+    expected_run["status"] = "stopped"
+    expected_run["current"] = False
+    expected_run["ok"] = False
+    expected_run["dataError"] = {
+        "id": "RunDataError",
+        "title": "Run Data Error",
+        "detail": "There was no engine state data for this run.",
+        "meta": {
+            "code": "4008",
+            "detail": {},
+            "message": "There was no engine state data for this run.",
+            "type": "InvalidStoredData",
+            "wrapping": [],
+        },
+        "errorCode": "4008",
+    }
+
+    # reboot the server
+    await client_and_server.restart()
+
+    await _assert_run_persisted(robot_client=client, expected_run_data=expected_run)
+
+
+async def test_runs_persist_via_patch(client_and_server: ClientServerFixture) -> None:
     """Test that runs are persisted through dev server restart."""
     client, server = client_and_server
 
@@ -100,10 +140,7 @@ async def test_runs_persist_via_actions_router(
     """Test that runs commands and state
     are persisted when calling play action through dev server restart."""
     client, server = client_and_server
-    # await client.post_protocol([Path("./tests/integration/protocols/simple.py")])
-    #
-    # protocols = (await client.get_protocols()).json()["data"]
-    # protocol_id = protocols[0]["id"]
+
     # create a run
     create_run_response = await client.post_run(req_body={"data": {}})
     run_id = create_run_response.json()["data"]["id"]