From 9ab55846e5738c1b4177c7a973eb6d3eab816490 Mon Sep 17 00:00:00 2001 From: Orion Eiger Date: Wed, 18 Sep 2024 12:37:07 -0700 Subject: [PATCH] Change status Literals to Enums --- .../pipe/base/quantum_provenance_graph.py | 154 ++++++++++-------- 1 file changed, 90 insertions(+), 64 deletions(-) diff --git a/python/lsst/pipe/base/quantum_provenance_graph.py b/python/lsst/pipe/base/quantum_provenance_graph.py index faa926bb..c13ff48d 100644 --- a/python/lsst/pipe/base/quantum_provenance_graph.py +++ b/python/lsst/pipe/base/quantum_provenance_graph.py @@ -43,7 +43,8 @@ import logging import uuid from collections.abc import Iterator, Sequence -from typing import TYPE_CHECKING, ClassVar, Literal, NamedTuple, TypeAlias, TypedDict, cast +from enum import Enum +from typing import TYPE_CHECKING, ClassVar, Literal, NamedTuple, TypedDict, cast import networkx import pydantic @@ -132,7 +133,28 @@ class PrerequisiteDatasetKey(NamedTuple): """ -QuantumRunStatus: TypeAlias = Literal["failed", "successful", "logs_missing", "blocked", "metadata_missing"] +class QuantumRunStatus(Enum): + """Enum describing the status of a quantum-run collection combination. + + Possible Statuses + ----------------- + metadata_missing = -3: Metadata is missing for this quantum in this run. + It is impossible to tell whether execution of this quantum was + attempted due to missing metadata. + logs_missing = -2: Logs are missing for this quantum in this run. It was + attempted, but it is impossible to tell if it succeeded or failed due + to missing logs. + failed = -1: Attempts to execute the quantum failed in this run. + blocked = 0: This run does not include an executed version of this + quantum because an upstream task failed. + successful = 1: This quantum was executed successfully in this run. + """ + + metadata_missing = -3 + logs_missing = -2 + failed = -1 + blocked = 0 + successful = 1 class QuantumRun(pydantic.BaseModel): @@ -144,23 +166,48 @@ class QuantumRun(pydantic.BaseModel): status: QuantumRunStatus = "metadata_missing" """The status of the quantum in that run. + """ + + +class QuantumInfoStatus(Enum): + """The status of a quantum (a particular task run on a particular dataID) + across all runs. Possible Statuses ----------------- - `failed`: Attempts to execute the quantum failed in this run. - `successful`: This quantum was executed successfully in this run. - `logs_missing`: Logs are missing for this quantum in this run. It was - attempted, but it is impossible to tell if it succeeded or failed due - to missing logs. - `blocked`: This run does not include an executed version of this quantum - because an upstream task failed. - `metadata_missing`: Metadata is missing for this quantum in this run. It is - impossible to tell whether execution of this quantum was attempted due - to missing metadata. + wonky = -3: The overall state of this quantum reflects inconsistencies or + is difficult to discern. There are a few specific ways to enter a wonky + state; it is impossible to exit and requires human intervention to + proceed with processing. + Currently, a quantum enters a wonky state for one of three reasons: + - Its `QuantumInfoStatus` exits a successful state. Something that + initially succeeded fails on + - A `QuantumRun` is missing logs. + - There are multiple runs associated with a dataset which comes up in a + findFirst search. This means that a dataset which will be used as an + input data product for further processing has heterogeneous inputs, + which may have had different inputs or a different data-query. + failed = -2: These quanta were attempted and failed. Failed quanta have + logs and no metadata. + unknown = -1: These are quanta which do not have any metadata associated + with processing, but for which it is impossible to tell the status due + to an additional absence of logs. Quanta which had not been processed + at all would reflect this state, as would quanta which were + conceptualized in the construction of the quantum graph but later + identified to be unneccesary or erroneous (deemed NoWorkFound by the + Science Pipelines). + blocked = 0: The quantum is not able to execute because its inputs are + missing due to an upstream failure. Blocked quanta are distinguished + from failed quanta by being successors of failed quanta in the graph. + All the successors of blocked quanta are also marked as blocked. + successful = 1: Attempts at executing this quantum were successful. """ - -QuantumInfoStatus: TypeAlias = Literal["successful", "wonky", "blocked", "unknown", "failed"] + wonky = -3 + failed = -2 + unknown = -1 + blocked = 0 + successful = 1 class QuantumInfo(TypedDict): @@ -181,35 +228,6 @@ class QuantumInfo(TypedDict): status: QuantumInfoStatus """The overall status of the quantum. Note that it is impossible to exit a wonky state. - - Possible Statuses - ----------------- - `successful`: Attempts at executing this quantum were successful. - `wonky`: The overall state of this quantum reflects inconsistencies or is - difficult to discern. There are a few specific ways to enter a wonky - state; it is impossible to exit and requires human intervention to - proceed with processing. - Currently, a quantum enters a wonky state for one of three reasons: - - Its `QuantumInfoStatus` exits a successful state. Something that - initially succeeded fails on - - A `QuantumRun` is missing logs. - - There are multiple runs associated with a dataset which comes up in a - findFirst search. This means that a dataset which will be used as an - input data product for further processing has heterogeneous inputs, - which may have had different inputs or a different data-query. - `blocked`: The quantum is not able to execute because its inputs are - missing due to an upstream failure. Blocked quanta are distinguished - from failed quanta by being successors of failed quanta in the graph. - All the successors of blocked quanta are also marked as blocked. - `unknown`: These are quanta which do not have any metadata associated - with processing, but for which it is impossible to tell the status due - to an additional absence of logs. Quanta which had not been processed - at all would reflect this state, as would quanta which were - conceptualized in the construction of the quantum graph but later - identified to be unneccesary or erroneous (deemed `NoWorkFound` by the - Science Pipelines). - `failed`: These quanta were attempted and failed. Failed quanta have logs - and no metadata. """ recovered: bool @@ -257,7 +275,35 @@ def _validate(self) -> DatasetRun: return self -DatasetInfoStatus: TypeAlias = Literal["visible", "shadowed", "predicted_only", "unsuccessful", "cursed"] +class DatasetInfoStatus(Enum): + """Status of the the DatasetType-dataID pair over all runs. + + Possible Statuses + ----------------- + cursed: The dataset was the result of an unsuccessful quantum and was + visible in the output collection anyway. These are flagged as + cursed so that they may be caught before they become inputs to + further processing. + unsuccessful: The dataset was not produced. These are the results of + failed or blocked quanta. + predicted_only: The dataset was predicted, and was not visible in any + run, but was the successor of a successful quantum. These datasets are + the result of pipelines NoWorkFound cases, in which a dataset is + predicted in the graph but found to not be necessary in processing. + shadowed: The dataset exists but is not queryable in a find_first + search. This could mean that the version of this dataset which is + passed as an input to further processing is not in the collections + given. A shadowed dataset will not be used as an input to further + processing. + visible: The dataset is queryable in a find_first search. This means + that it can be used as an input by subsequent tasks and processing. + """ + + cursed = -2 + unsuccessful = -1 + predicted_only = 0 + shadowed = 1 + visible = 2 class DatasetInfo(TypedDict): @@ -276,26 +322,6 @@ class DatasetInfo(TypedDict): status: DatasetInfoStatus """Overall status of the dataset. - - Possible Statuses - ----------------- - `visible`: The dataset is queryable in a find_first search. This means - that it can be used as an input by subsequent tasks and processing. - `shadowed`: The dataset exists but is not queryable in a find_first - search. This could mean that the version of this dataset which is - passed as an input to further processing is not in the collections - given. A `shadowed` dataset will not be used as an input to further - processing. - `predicted_only`: The dataset was predicted, and was not visible in any - run, but was the successor of a successful quantum. These datasets are - the result of pipelines `NoWorkFound` cases, in which a dataset is - predicted in the graph but found to not be necessary in processing. - `unsuccessful`: The dataset was not produced. These are the results of - failed or blocked quanta. - `cursed`: The dataset was the result of an unsuccessful quantum and was - visible in the output collection anyway. These are flagged as - `cursed` so that they may be caught before they become inputs to - further processing. """ messages: list[str]