From 244a80e5985cf608e7b89013e36bef2337f71619 Mon Sep 17 00:00:00 2001 From: Max Marrone Date: Mon, 25 Mar 2024 18:37:46 -0400 Subject: [PATCH] fix(robot-server): Update tests to properly check new bad run records (#14723) # Overview Follow-ups for https://github.com/Opentrons/opentrons/pull/14711#discussion_r1534599540. #14711 added safer error propagation for when robot-server encounters bad stored run data. As part of that, if it finds a run where the `state_summary` SQL column is `NULL`, it treats that as bad data and propagates the error to HTTP clients. When you restart the robot while there is an active run, then no state summary will be inserted (this only happens when the run is ended and the state moves from engine to sql) and the run will be bad. We say that this is in fact a bad run because to the client, there is no distinction between state summary and run. A run with an empty state summary does not have correct data and does not represent what occurred. Add a regression test to make sure this is how we handle runs that did not have state summaries persisted. # Testing - [x] create a run with this branch on a flex and restart the flex (or kill the robot server process - this isn't about the details of when things are written to disk, just the lifetime of the data here) and see that the run is now bad --------- Co-authored-by: Seth Foster --- robot-server/robot_server/runs/run_models.py | 2 +- .../persistence/test_compatibility.py | 192 +++++++++++------- .../http_api/runs/test_persistence.py | 49 ++++- 3 files changed, 157 insertions(+), 86 deletions(-) diff --git a/robot-server/robot_server/runs/run_models.py b/robot-server/robot_server/runs/run_models.py index 379feeaea0a..e05cd25330c 100644 --- a/robot-server/robot_server/runs/run_models.py +++ b/robot-server/robot_server/runs/run_models.py @@ -28,7 +28,7 @@ class RunDataError(ErrorDetails): """A model for an error loading a run.""" title: str = Field( - "Run Loading Error", + "Run Data Error", description="A short, human readable name for this type of error", ) id: Literal["RunDataError"] = "RunDataError" diff --git a/robot-server/tests/integration/http_api/persistence/test_compatibility.py b/robot-server/tests/integration/http_api/persistence/test_compatibility.py index 33aa5a6cfd9..0e487585bed 100644 --- a/robot-server/tests/integration/http_api/persistence/test_compatibility.py +++ b/robot-server/tests/integration/http_api/persistence/test_compatibility.py @@ -1,8 +1,10 @@ +from __future__ import annotations + from dataclasses import dataclass, field from pathlib import Path from shutil import copytree from tempfile import TemporaryDirectory -from typing import List +from typing import List, Literal, Union, TYPE_CHECKING import anyio import pytest @@ -12,6 +14,9 @@ from .persistence_snapshots_dir import PERSISTENCE_SNAPSHOTS_DIR +if TYPE_CHECKING: + from _pytest.mark import ParameterSet + # Allow plenty of time for database migrations, which can take a while in our CI runners. _STARTUP_TIMEOUT = 60 @@ -28,6 +33,14 @@ class Run: id: str expected_command_count: int + ok: Literal[True] = True + + +@dataclass +class BadRun: + id: str + expected_command_count: int + ok: Literal[False] = False @dataclass @@ -36,7 +49,7 @@ class Snapshot: version: str expected_protocol_count: int - expected_runs: List[Run] + expected_runs: List[Union[Run, BadRun]] protocols_with_no_analyses: List[str] = field(default_factory=list) def get_copy(self) -> Path: @@ -58,86 +71,103 @@ def get_copy(self) -> Path: ) -snapshots: List[(Snapshot)] = [ - Snapshot( - version="v6.0.1", - expected_protocol_count=4, - expected_runs=[ - Run("7bc1f20d-3925-4aa2-b200-82906112816f", 23), - Run("1b00190c-013f-463d-b371-5bf49b6ad61f", 16), - Run("8165be3f-382f-4b1f-97d7-f3c4ae613868", 65), - Run("467761f3-7339-4b8d-9007-4482500657da", 65), - Run("f7817fa9-bc80-45c0-afea-f7c4af30a663", 333), - ], +snapshots: List[ParameterSet] = [ + pytest.param( + Snapshot( + version="v6.0.1", + expected_protocol_count=4, + expected_runs=[ + Run("7bc1f20d-3925-4aa2-b200-82906112816f", 23), + Run("1b00190c-013f-463d-b371-5bf49b6ad61f", 16), + Run("8165be3f-382f-4b1f-97d7-f3c4ae613868", 65), + Run("467761f3-7339-4b8d-9007-4482500657da", 65), + Run("f7817fa9-bc80-45c0-afea-f7c4af30a663", 333), + ], + ), + id="v6.0.1", ), - Snapshot( - version="v6.1.0", - expected_protocol_count=2, - expected_runs=[ - Run("a4338d46-96af-4e23-877d-1d79227a0946", 147), - Run("efc7374f-2e64-45ea-83fe-bd7a55f2699e", 205), - ], + pytest.param( + Snapshot( + version="v6.1.0", + expected_protocol_count=2, + expected_runs=[ + Run("a4338d46-96af-4e23-877d-1d79227a0946", 147), + Run("efc7374f-2e64-45ea-83fe-bd7a55f2699e", 205), + ], + ), + id="v6.0.1", ), - Snapshot( - version="v6.2.0", - expected_protocol_count=2, - expected_runs=[ - Run("199b991d-db3c-49ff-9b4f-905118c10685", 125), - Run("25a66ec6-2137-4680-8a94-d53c0e2a7488", 87), - ], + pytest.param( + Snapshot( + version="v6.2.0", + expected_protocol_count=2, + expected_runs=[ + Run("199b991d-db3c-49ff-9b4f-905118c10685", 125), + Run("25a66ec6-2137-4680-8a94-d53c0e2a7488", 87), + ], + ), + id="v6.2.0", ), - Snapshot( - version="v6.2.0_large", - expected_protocol_count=17, - expected_runs=[ - Run("eeb17dc0-1878-432a-bf3f-33e7d3023b8d", 218), - Run("917cf0f8-8b79-47ab-a407-918c182eb6df", 125), - Run("7b87bac2-680a-4757-a10f-8341a6dce540", 185), - Run("0b97477c-844d-406a-87e8-0852421d7212", 0), - Run("f31659a6-33c9-406d-beb5-da2ec19ef063", 120), - Run("965b45f4-f296-44bf-ae20-df297d3a35af", 8), - Run("b97b0ee8-2ba4-43cd-99aa-601b60f5b75d", 13), - Run("7dd90a28-14b6-4e6f-86a8-41ca6e6e42ae", 11), - Run("dc9162c2-f9f6-48aa-a923-7ba252d3eb1d", 15), - Run("2d9b6f1b-e2fd-40a9-9219-504df2c89305", 0), - Run("9ba966c6-bc2f-4c65-b898-59a4f2530f35", 0), - Run("5f30a0dd-e4da-4f24-abce-7468067d883a", 0), - Run("83f0bad0-6bb2-4ecd-bccf-f14667298168", 0), - Run("0b97363d-0910-43a0-b5a2-b6a62ad2fa6b", 96), - Run("35c014ec-b6ea-4665-8149-5c6340cbc5ca", 0), - Run("d2b68ac6-5c4f-4914-bc2e-f306a976d582", 220), - ], - protocols_with_no_analyses=[ - "429e72e1-6ff1-4328-8a1d-c13fe3ac0c80", - "e3515d46-3c3b-425b-8734-bd6e38d6a729", - ], + pytest.param( + Snapshot( + version="v6.2.0_large", + expected_protocol_count=17, + expected_runs=[ + Run("eeb17dc0-1878-432a-bf3f-33e7d3023b8d", 218), + Run("917cf0f8-8b79-47ab-a407-918c182eb6df", 125), + Run("7b87bac2-680a-4757-a10f-8341a6dce540", 185), + BadRun("0b97477c-844d-406a-87e8-0852421d7212", 0), + Run("f31659a6-33c9-406d-beb5-da2ec19ef063", 120), + Run("965b45f4-f296-44bf-ae20-df297d3a35af", 8), + Run("b97b0ee8-2ba4-43cd-99aa-601b60f5b75d", 13), + Run("7dd90a28-14b6-4e6f-86a8-41ca6e6e42ae", 11), + Run("dc9162c2-f9f6-48aa-a923-7ba252d3eb1d", 15), + BadRun("2d9b6f1b-e2fd-40a9-9219-504df2c89305", 0), + # This is a run of a protocol with no commands + Run("9ba966c6-bc2f-4c65-b898-59a4f2530f35", 0), + BadRun("5f30a0dd-e4da-4f24-abce-7468067d883a", 0), + # Stopped early + Run("83f0bad0-6bb2-4ecd-bccf-f14667298168", 0), + Run("0b97363d-0910-43a0-b5a2-b6a62ad2fa6b", 96), + Run("35c014ec-b6ea-4665-8149-5c6340cbc5ca", 0), + Run("d2b68ac6-5c4f-4914-bc2e-f306a976d582", 220), + ], + protocols_with_no_analyses=[ + "429e72e1-6ff1-4328-8a1d-c13fe3ac0c80", + "e3515d46-3c3b-425b-8734-bd6e38d6a729", + ], + ), + id="v6.2.0_large", ), - flex_dev_compat_snapshot, - Snapshot( - version="v7.1.1", - expected_protocol_count=10, - expected_runs=[ - Run("69fe2d6f-3bda-4dfb-800b-cd93017d1cbd", 4634), - Run("04ec9eda-19b2-4850-9148-d28112565b37", 0), - Run("7edf736e-2b5c-41c0-be37-7ab7ac215445", 787), - Run("4f623a64-20ce-464b-a118-e8a785911613", 0), - Run("237fd93f-e4a5-4c37-9675-58a8a3c32bbb", 953), - Run("59706cac-74d5-4542-8b38-499d11ad352e", 54), - Run("ef7794a5-3afd-438d-a69e-34138d9ae520", 0), - Run("b0c6a8fa-f117-4f5f-b5f0-22c487d83526", 359), - Run("62011896-29f5-40b4-83ee-29d7b7817583", 0), - Run("790d551d-68f0-4513-8896-bc175f629546", 1541), - Run("92dafa40-3425-4a74-9d20-a3fc08365a92", 0), - Run("7622aed6-08bf-4339-accc-952dcad310ce", 205), - Run("b710a6c2-d373-4bc1-ad14-18f1094d7104", 0), - Run("0b593bb0-d2d8-4c21-afc5-44e4868aeeef", 1609), - Run("22d99b67-3062-48ed-80bd-4505def1bb7d", 0), - Run("519a45e1-f68a-454f-bac9-0910eaddbbac", 18), - Run("7367493c-40b1-4516-abf5-9c5b0228d27f", 679), - Run("4af7e324-2f2b-40bc-803c-e9100016d2b3", 1183), - Run("e164059b-57dc-4a68-a23b-b026a7addf2a", 1467), - Run("ae2e23fc-74fb-4b3f-9b8b-d632e31b222a", 0), - ], + pytest.param(flex_dev_compat_snapshot, id="flex_dev_compat"), + pytest.param( + Snapshot( + version="v7.1.1", + expected_protocol_count=10, + expected_runs=[ + Run("69fe2d6f-3bda-4dfb-800b-cd93017d1cbd", 4634), + BadRun("04ec9eda-19b2-4850-9148-d28112565b37", 0), + Run("7edf736e-2b5c-41c0-be37-7ab7ac215445", 787), + BadRun("4f623a64-20ce-464b-a118-e8a785911613", 0), + Run("237fd93f-e4a5-4c37-9675-58a8a3c32bbb", 953), + Run("59706cac-74d5-4542-8b38-499d11ad352e", 54), + BadRun("ef7794a5-3afd-438d-a69e-34138d9ae520", 0), + Run("b0c6a8fa-f117-4f5f-b5f0-22c487d83526", 359), + BadRun("62011896-29f5-40b4-83ee-29d7b7817583", 0), + Run("790d551d-68f0-4513-8896-bc175f629546", 1541), + BadRun("92dafa40-3425-4a74-9d20-a3fc08365a92", 0), + Run("7622aed6-08bf-4339-accc-952dcad310ce", 205), + BadRun("b710a6c2-d373-4bc1-ad14-18f1094d7104", 0), + Run("0b593bb0-d2d8-4c21-afc5-44e4868aeeef", 1609), + BadRun("22d99b67-3062-48ed-80bd-4505def1bb7d", 0), + Run("519a45e1-f68a-454f-bac9-0910eaddbbac", 18), + Run("7367493c-40b1-4516-abf5-9c5b0228d27f", 679), + Run("4af7e324-2f2b-40bc-803c-e9100016d2b3", 1183), + Run("e164059b-57dc-4a68-a23b-b026a7addf2a", 1467), + BadRun("ae2e23fc-74fb-4b3f-9b8b-d632e31b222a", 0), + ], + ), + id="v7.1.1", ), ] @@ -198,7 +228,11 @@ async def test_protocols_analyses_and_runs_available_from_older_persistence_dir( assert all_run_ids == [r.id for r in snapshot.expected_runs] for expected_run in snapshot.expected_runs: - await robot_client.get_run(run_id=expected_run.id) + run = (await robot_client.get_run(run_id=expected_run.id)).json() + if expected_run.ok: + assert run["data"].get("dataError") is None + else: + assert run["data"].get("dataError") is not None all_command_summaries = ( await robot_client.get_run_commands( diff --git a/robot-server/tests/integration/http_api/runs/test_persistence.py b/robot-server/tests/integration/http_api/runs/test_persistence.py index a8a46546108..45b55202fda 100644 --- a/robot-server/tests/integration/http_api/runs/test_persistence.py +++ b/robot-server/tests/integration/http_api/runs/test_persistence.py @@ -1,5 +1,6 @@ -from typing import Any, AsyncGenerator, Dict, NamedTuple, cast +from copy import deepcopy from datetime import datetime +from typing import Any, AsyncGenerator, Dict, NamedTuple, cast import anyio import pytest @@ -73,7 +74,46 @@ async def _assert_run_persisted( assert get_persisted_run_response.json()["data"] == expected_run_data -async def test_runs_persist(client_and_server: ClientServerFixture) -> None: +async def test_untimely_restart_marks_runs_bad( + client_and_server: ClientServerFixture, +) -> None: + """Test that a run persists even if the server was restarted before the run was + gracefully closed out.""" + client, server = client_and_server + + # create a run + create_run_response = await client.post_run(req_body={"data": {}}) + run_id = create_run_response.json()["data"]["id"] + + run = (await client.get_run(run_id)).json()["data"] + assert run["status"] == "idle" + assert run["current"] is True + # Some loss of state is expected. + expected_run = deepcopy(run) + expected_run["status"] = "stopped" + expected_run["current"] = False + expected_run["ok"] = False + expected_run["dataError"] = { + "id": "RunDataError", + "title": "Run Data Error", + "detail": "There was no engine state data for this run.", + "meta": { + "code": "4008", + "detail": {}, + "message": "There was no engine state data for this run.", + "type": "InvalidStoredData", + "wrapping": [], + }, + "errorCode": "4008", + } + + # reboot the server + await client_and_server.restart() + + await _assert_run_persisted(robot_client=client, expected_run_data=expected_run) + + +async def test_runs_persist_via_patch(client_and_server: ClientServerFixture) -> None: """Test that runs are persisted through dev server restart.""" client, server = client_and_server @@ -100,10 +140,7 @@ async def test_runs_persist_via_actions_router( """Test that runs commands and state are persisted when calling play action through dev server restart.""" client, server = client_and_server - # await client.post_protocol([Path("./tests/integration/protocols/simple.py")]) - # - # protocols = (await client.get_protocols()).json()["data"] - # protocol_id = protocols[0]["id"] + # create a run create_run_response = await client.post_run(req_body={"data": {}}) run_id = create_run_response.json()["data"]["id"]