Skip to content

Commit

Permalink
fix(robot-server): Update tests to properly check new bad run records (
Browse files Browse the repository at this point in the history
…#14723)

# Overview

Follow-ups for
#14711 (comment).

#14711 added safer error propagation for when robot-server encounters
bad stored run data. As part of that, if it finds a run where the
`state_summary` SQL column is `NULL`, it treats that as bad data and
propagates the error to HTTP clients.

When you restart the robot while there is an active run, then no state
summary will be inserted (this only happens when the run is ended and
the state moves from engine to sql) and the run will be bad.

We say that this is in fact a bad run because to the client, there is no
distinction between state summary and run. A run with an empty state
summary does not have correct data and does not represent what occurred.

Add a regression test to make sure this is how we handle runs that did
not have state summaries persisted.

# Testing
- [x] create a run with this branch on a flex and restart the flex (or
kill the robot server process - this isn't about the details of when
things are written to disk, just the lifetime of the data here) and see
that the run is now bad

---------

Co-authored-by: Seth Foster <[email protected]>
  • Loading branch information
SyntaxColoring and sfoster1 authored Mar 25, 2024
1 parent 29414e2 commit 244a80e
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 86 deletions.
2 changes: 1 addition & 1 deletion robot-server/robot_server/runs/run_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class RunDataError(ErrorDetails):
"""A model for an error loading a run."""

title: str = Field(
"Run Loading Error",
"Run Data Error",
description="A short, human readable name for this type of error",
)
id: Literal["RunDataError"] = "RunDataError"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from __future__ import annotations

from dataclasses import dataclass, field
from pathlib import Path
from shutil import copytree
from tempfile import TemporaryDirectory
from typing import List
from typing import List, Literal, Union, TYPE_CHECKING

import anyio
import pytest
Expand All @@ -12,6 +14,9 @@

from .persistence_snapshots_dir import PERSISTENCE_SNAPSHOTS_DIR

if TYPE_CHECKING:
from _pytest.mark import ParameterSet

# Allow plenty of time for database migrations, which can take a while in our CI runners.
_STARTUP_TIMEOUT = 60

Expand All @@ -28,6 +33,14 @@
class Run:
id: str
expected_command_count: int
ok: Literal[True] = True


@dataclass
class BadRun:
id: str
expected_command_count: int
ok: Literal[False] = False


@dataclass
Expand All @@ -36,7 +49,7 @@ class Snapshot:

version: str
expected_protocol_count: int
expected_runs: List[Run]
expected_runs: List[Union[Run, BadRun]]
protocols_with_no_analyses: List[str] = field(default_factory=list)

def get_copy(self) -> Path:
Expand All @@ -58,86 +71,103 @@ def get_copy(self) -> Path:
)


snapshots: List[(Snapshot)] = [
Snapshot(
version="v6.0.1",
expected_protocol_count=4,
expected_runs=[
Run("7bc1f20d-3925-4aa2-b200-82906112816f", 23),
Run("1b00190c-013f-463d-b371-5bf49b6ad61f", 16),
Run("8165be3f-382f-4b1f-97d7-f3c4ae613868", 65),
Run("467761f3-7339-4b8d-9007-4482500657da", 65),
Run("f7817fa9-bc80-45c0-afea-f7c4af30a663", 333),
],
snapshots: List[ParameterSet] = [
pytest.param(
Snapshot(
version="v6.0.1",
expected_protocol_count=4,
expected_runs=[
Run("7bc1f20d-3925-4aa2-b200-82906112816f", 23),
Run("1b00190c-013f-463d-b371-5bf49b6ad61f", 16),
Run("8165be3f-382f-4b1f-97d7-f3c4ae613868", 65),
Run("467761f3-7339-4b8d-9007-4482500657da", 65),
Run("f7817fa9-bc80-45c0-afea-f7c4af30a663", 333),
],
),
id="v6.0.1",
),
Snapshot(
version="v6.1.0",
expected_protocol_count=2,
expected_runs=[
Run("a4338d46-96af-4e23-877d-1d79227a0946", 147),
Run("efc7374f-2e64-45ea-83fe-bd7a55f2699e", 205),
],
pytest.param(
Snapshot(
version="v6.1.0",
expected_protocol_count=2,
expected_runs=[
Run("a4338d46-96af-4e23-877d-1d79227a0946", 147),
Run("efc7374f-2e64-45ea-83fe-bd7a55f2699e", 205),
],
),
id="v6.0.1",
),
Snapshot(
version="v6.2.0",
expected_protocol_count=2,
expected_runs=[
Run("199b991d-db3c-49ff-9b4f-905118c10685", 125),
Run("25a66ec6-2137-4680-8a94-d53c0e2a7488", 87),
],
pytest.param(
Snapshot(
version="v6.2.0",
expected_protocol_count=2,
expected_runs=[
Run("199b991d-db3c-49ff-9b4f-905118c10685", 125),
Run("25a66ec6-2137-4680-8a94-d53c0e2a7488", 87),
],
),
id="v6.2.0",
),
Snapshot(
version="v6.2.0_large",
expected_protocol_count=17,
expected_runs=[
Run("eeb17dc0-1878-432a-bf3f-33e7d3023b8d", 218),
Run("917cf0f8-8b79-47ab-a407-918c182eb6df", 125),
Run("7b87bac2-680a-4757-a10f-8341a6dce540", 185),
Run("0b97477c-844d-406a-87e8-0852421d7212", 0),
Run("f31659a6-33c9-406d-beb5-da2ec19ef063", 120),
Run("965b45f4-f296-44bf-ae20-df297d3a35af", 8),
Run("b97b0ee8-2ba4-43cd-99aa-601b60f5b75d", 13),
Run("7dd90a28-14b6-4e6f-86a8-41ca6e6e42ae", 11),
Run("dc9162c2-f9f6-48aa-a923-7ba252d3eb1d", 15),
Run("2d9b6f1b-e2fd-40a9-9219-504df2c89305", 0),
Run("9ba966c6-bc2f-4c65-b898-59a4f2530f35", 0),
Run("5f30a0dd-e4da-4f24-abce-7468067d883a", 0),
Run("83f0bad0-6bb2-4ecd-bccf-f14667298168", 0),
Run("0b97363d-0910-43a0-b5a2-b6a62ad2fa6b", 96),
Run("35c014ec-b6ea-4665-8149-5c6340cbc5ca", 0),
Run("d2b68ac6-5c4f-4914-bc2e-f306a976d582", 220),
],
protocols_with_no_analyses=[
"429e72e1-6ff1-4328-8a1d-c13fe3ac0c80",
"e3515d46-3c3b-425b-8734-bd6e38d6a729",
],
pytest.param(
Snapshot(
version="v6.2.0_large",
expected_protocol_count=17,
expected_runs=[
Run("eeb17dc0-1878-432a-bf3f-33e7d3023b8d", 218),
Run("917cf0f8-8b79-47ab-a407-918c182eb6df", 125),
Run("7b87bac2-680a-4757-a10f-8341a6dce540", 185),
BadRun("0b97477c-844d-406a-87e8-0852421d7212", 0),
Run("f31659a6-33c9-406d-beb5-da2ec19ef063", 120),
Run("965b45f4-f296-44bf-ae20-df297d3a35af", 8),
Run("b97b0ee8-2ba4-43cd-99aa-601b60f5b75d", 13),
Run("7dd90a28-14b6-4e6f-86a8-41ca6e6e42ae", 11),
Run("dc9162c2-f9f6-48aa-a923-7ba252d3eb1d", 15),
BadRun("2d9b6f1b-e2fd-40a9-9219-504df2c89305", 0),
# This is a run of a protocol with no commands
Run("9ba966c6-bc2f-4c65-b898-59a4f2530f35", 0),
BadRun("5f30a0dd-e4da-4f24-abce-7468067d883a", 0),
# Stopped early
Run("83f0bad0-6bb2-4ecd-bccf-f14667298168", 0),
Run("0b97363d-0910-43a0-b5a2-b6a62ad2fa6b", 96),
Run("35c014ec-b6ea-4665-8149-5c6340cbc5ca", 0),
Run("d2b68ac6-5c4f-4914-bc2e-f306a976d582", 220),
],
protocols_with_no_analyses=[
"429e72e1-6ff1-4328-8a1d-c13fe3ac0c80",
"e3515d46-3c3b-425b-8734-bd6e38d6a729",
],
),
id="v6.2.0_large",
),
flex_dev_compat_snapshot,
Snapshot(
version="v7.1.1",
expected_protocol_count=10,
expected_runs=[
Run("69fe2d6f-3bda-4dfb-800b-cd93017d1cbd", 4634),
Run("04ec9eda-19b2-4850-9148-d28112565b37", 0),
Run("7edf736e-2b5c-41c0-be37-7ab7ac215445", 787),
Run("4f623a64-20ce-464b-a118-e8a785911613", 0),
Run("237fd93f-e4a5-4c37-9675-58a8a3c32bbb", 953),
Run("59706cac-74d5-4542-8b38-499d11ad352e", 54),
Run("ef7794a5-3afd-438d-a69e-34138d9ae520", 0),
Run("b0c6a8fa-f117-4f5f-b5f0-22c487d83526", 359),
Run("62011896-29f5-40b4-83ee-29d7b7817583", 0),
Run("790d551d-68f0-4513-8896-bc175f629546", 1541),
Run("92dafa40-3425-4a74-9d20-a3fc08365a92", 0),
Run("7622aed6-08bf-4339-accc-952dcad310ce", 205),
Run("b710a6c2-d373-4bc1-ad14-18f1094d7104", 0),
Run("0b593bb0-d2d8-4c21-afc5-44e4868aeeef", 1609),
Run("22d99b67-3062-48ed-80bd-4505def1bb7d", 0),
Run("519a45e1-f68a-454f-bac9-0910eaddbbac", 18),
Run("7367493c-40b1-4516-abf5-9c5b0228d27f", 679),
Run("4af7e324-2f2b-40bc-803c-e9100016d2b3", 1183),
Run("e164059b-57dc-4a68-a23b-b026a7addf2a", 1467),
Run("ae2e23fc-74fb-4b3f-9b8b-d632e31b222a", 0),
],
pytest.param(flex_dev_compat_snapshot, id="flex_dev_compat"),
pytest.param(
Snapshot(
version="v7.1.1",
expected_protocol_count=10,
expected_runs=[
Run("69fe2d6f-3bda-4dfb-800b-cd93017d1cbd", 4634),
BadRun("04ec9eda-19b2-4850-9148-d28112565b37", 0),
Run("7edf736e-2b5c-41c0-be37-7ab7ac215445", 787),
BadRun("4f623a64-20ce-464b-a118-e8a785911613", 0),
Run("237fd93f-e4a5-4c37-9675-58a8a3c32bbb", 953),
Run("59706cac-74d5-4542-8b38-499d11ad352e", 54),
BadRun("ef7794a5-3afd-438d-a69e-34138d9ae520", 0),
Run("b0c6a8fa-f117-4f5f-b5f0-22c487d83526", 359),
BadRun("62011896-29f5-40b4-83ee-29d7b7817583", 0),
Run("790d551d-68f0-4513-8896-bc175f629546", 1541),
BadRun("92dafa40-3425-4a74-9d20-a3fc08365a92", 0),
Run("7622aed6-08bf-4339-accc-952dcad310ce", 205),
BadRun("b710a6c2-d373-4bc1-ad14-18f1094d7104", 0),
Run("0b593bb0-d2d8-4c21-afc5-44e4868aeeef", 1609),
BadRun("22d99b67-3062-48ed-80bd-4505def1bb7d", 0),
Run("519a45e1-f68a-454f-bac9-0910eaddbbac", 18),
Run("7367493c-40b1-4516-abf5-9c5b0228d27f", 679),
Run("4af7e324-2f2b-40bc-803c-e9100016d2b3", 1183),
Run("e164059b-57dc-4a68-a23b-b026a7addf2a", 1467),
BadRun("ae2e23fc-74fb-4b3f-9b8b-d632e31b222a", 0),
],
),
id="v7.1.1",
),
]

Expand Down Expand Up @@ -198,7 +228,11 @@ async def test_protocols_analyses_and_runs_available_from_older_persistence_dir(
assert all_run_ids == [r.id for r in snapshot.expected_runs]

for expected_run in snapshot.expected_runs:
await robot_client.get_run(run_id=expected_run.id)
run = (await robot_client.get_run(run_id=expected_run.id)).json()
if expected_run.ok:
assert run["data"].get("dataError") is None
else:
assert run["data"].get("dataError") is not None

all_command_summaries = (
await robot_client.get_run_commands(
Expand Down
49 changes: 43 additions & 6 deletions robot-server/tests/integration/http_api/runs/test_persistence.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Any, AsyncGenerator, Dict, NamedTuple, cast
from copy import deepcopy
from datetime import datetime
from typing import Any, AsyncGenerator, Dict, NamedTuple, cast

import anyio
import pytest
Expand Down Expand Up @@ -73,7 +74,46 @@ async def _assert_run_persisted(
assert get_persisted_run_response.json()["data"] == expected_run_data


async def test_runs_persist(client_and_server: ClientServerFixture) -> None:
async def test_untimely_restart_marks_runs_bad(
client_and_server: ClientServerFixture,
) -> None:
"""Test that a run persists even if the server was restarted before the run was
gracefully closed out."""
client, server = client_and_server

# create a run
create_run_response = await client.post_run(req_body={"data": {}})
run_id = create_run_response.json()["data"]["id"]

run = (await client.get_run(run_id)).json()["data"]
assert run["status"] == "idle"
assert run["current"] is True
# Some loss of state is expected.
expected_run = deepcopy(run)
expected_run["status"] = "stopped"
expected_run["current"] = False
expected_run["ok"] = False
expected_run["dataError"] = {
"id": "RunDataError",
"title": "Run Data Error",
"detail": "There was no engine state data for this run.",
"meta": {
"code": "4008",
"detail": {},
"message": "There was no engine state data for this run.",
"type": "InvalidStoredData",
"wrapping": [],
},
"errorCode": "4008",
}

# reboot the server
await client_and_server.restart()

await _assert_run_persisted(robot_client=client, expected_run_data=expected_run)


async def test_runs_persist_via_patch(client_and_server: ClientServerFixture) -> None:
"""Test that runs are persisted through dev server restart."""
client, server = client_and_server

Expand All @@ -100,10 +140,7 @@ async def test_runs_persist_via_actions_router(
"""Test that runs commands and state
are persisted when calling play action through dev server restart."""
client, server = client_and_server
# await client.post_protocol([Path("./tests/integration/protocols/simple.py")])
#
# protocols = (await client.get_protocols()).json()["data"]
# protocol_id = protocols[0]["id"]

# create a run
create_run_response = await client.post_run(req_body={"data": {}})
run_id = create_run_response.json()["data"]["id"]
Expand Down

0 comments on commit 244a80e

Please sign in to comment.