From ba717a6da36864ca4b937d7f23f30e134f9e8448 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 17 Aug 2023 16:01:37 -0700 Subject: [PATCH 1/4] Simplify the logic when setting trust mode in a datastore for testing --- tests/test_butler.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/test_butler.py b/tests/test_butler.py index 9bebae1202..3703b2d67a 100644 --- a/tests/test_butler.py +++ b/tests/test_butler.py @@ -2056,12 +2056,10 @@ def testTransferUuidToUuid(self) -> None: self.assertButlerTransfers() def _enable_trust(self, datastore: Datastore) -> None: - if hasattr(datastore, "trustGetRequest"): - datastore.trustGetRequest = True - elif hasattr(datastore, "datastores"): - for this_datastore in datastore.datastores: - if hasattr(this_datastore, "trustGetRequest"): - this_datastore.trustGetRequest = True + datastores = getattr(datastore, "datastores", [datastore]) + for this_datastore in datastores: + if hasattr(this_datastore, "trustGetRequest"): + this_datastore.trustGetRequest = True def testTransferMissing(self) -> None: """Test transfers where datastore records are missing. From a2dbe3f9fee2f8829ec6d197c7171c90138b2b26 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 17 Aug 2023 16:01:59 -0700 Subject: [PATCH 2/4] Fall back on exists() if datastore doesn't know about the deferred ref This is needed for execution butler where datastore doesn't know about files that do exist. --- python/lsst/daf/butler/_butler.py | 9 +++++++-- tests/test_butler.py | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index c70c2e3780..c4e77de83a 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -1341,9 +1341,14 @@ def getDeferred( Raised if no collections were provided. """ if isinstance(datasetRefOrType, DatasetRef): - if not self._datastore.knows(datasetRefOrType): + # Do the quick check first and if that fails, check for artifact + # existence. This is necessary for datastores that are configured + # in trust mode where there won't be a record but there will be + # a file. + if self._datastore.knows(datasetRefOrType) or self._datastore.exists(datasetRefOrType): + ref = datasetRefOrType + else: raise LookupError(f"Dataset reference {datasetRefOrType} does not exist.") - ref = datasetRefOrType else: ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs) return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) diff --git a/tests/test_butler.py b/tests/test_butler.py index 3703b2d67a..a9794af46d 100644 --- a/tests/test_butler.py +++ b/tests/test_butler.py @@ -1559,6 +1559,11 @@ def testPruneDatasets(self) -> None: for ref in refs: butler.put(metric, ref) + # Confirm we can retrieve deferred. + dref1 = butler.getDeferred(ref1) # known and exists + metric1 = dref1.get() + self.assertEqual(metric1, metric) + # Test different forms of file availability. # Need to be in a state where: # - one ref just has registry record. @@ -1603,6 +1608,14 @@ def testPruneDatasets(self) -> None: for ref, exists in exists_many.items(): self.assertEqual(butler.exists(ref, full_check=False), exists) + # Get deferred checks for existence before it allows it to be + # retrieved. + with self.assertRaises(LookupError): + butler.getDeferred(ref3) # not known, file exists + dref2 = butler.getDeferred(ref2) # known but file missing + with self.assertRaises(FileNotFoundError): + dref2.get() + # Test again with a trusting butler. butler._datastore.trustGetRequest = True exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) @@ -1611,6 +1624,12 @@ def testPruneDatasets(self) -> None: self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT) + # When trusting we can get a deferred dataset handle that is not + # known but does exist. + dref3 = butler.getDeferred(ref3) + metric3 = dref3.get() + self.assertEqual(metric3, metric) + # Check that per-ref query gives the same answer as many query. for ref, exists in exists_many.items(): self.assertEqual(butler.exists(ref, full_check=True), exists) From 86ee7ec5b99eb7972fc09f79b7f164fbc62ee12e Mon Sep 17 00:00:00 2001 From: Andy Salnikov Date: Thu, 17 Aug 2023 16:25:10 -0700 Subject: [PATCH 3/4] Fix for ruff complaint --- .pre-commit-config.yaml | 4 ++-- pyproject.toml | 8 +++++--- python/lsst/daf/butler/core/dimensions/_records.py | 4 +++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 907e850f6c..a5a5eb5949 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ repos: - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/psf/black - rev: 23.3.0 + rev: 23.7.0 hooks: - id: black # It is recommended to specify the latest version of Python @@ -23,6 +23,6 @@ repos: name: isort (python) - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.0.277 + rev: v0.0.285 hooks: - id: ruff diff --git a/pyproject.toml b/pyproject.toml index fa9e804e90..e29e24aab4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -180,9 +180,11 @@ select = [ "D", # pydocstyle ] target-version = "py311" -extend-select = [ - "RUF100", # Warn about unused noqa -] +# Commented out to suppress "unused noqa" in jenkins which has older ruff not +# generating E721. +#extend-select = [ +# "RUF100", # Warn about unused noqa +#] [tool.ruff.per-file-ignores] # parserYacc docstrings can not be fixed. Docstrings are used to define grammar. diff --git a/python/lsst/daf/butler/core/dimensions/_records.py b/python/lsst/daf/butler/core/dimensions/_records.py index 0909539c3e..5da3e6b495 100644 --- a/python/lsst/daf/butler/core/dimensions/_records.py +++ b/python/lsst/daf/butler/core/dimensions/_records.py @@ -190,7 +190,9 @@ def direct( # This method requires tuples as values of the mapping, but JSON # readers will read things in as lists. Be kind and transparently # transform to tuples - _recItems = {k: v if type(v) != list else tuple(v) for k, v in record.items()} # type: ignore + _recItems = { + k: v if type(v) != list else tuple(v) for k, v in record.items() # type: ignore # noqa: E721 + } # Type ignore because the ternary statement seems to confuse mypy # based on conflicting inferred types of v. From 40e041c4e13a0f702c09e13858bab583a1e1fffb Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 17 Aug 2023 12:42:09 -0700 Subject: [PATCH 4/4] Move StrictInt ahead of StrictFloat in model In newer pydantic this converts a 0 to 0.0 when serializing to Json and this breaks when reading a record back that is meant to be an integer. This changes in pydantic 2.2 whereas 2.1 behaved like 1.0. This happens because the dynamic subclasses apply dynamic constraints based on the specific record data model so they know that something must be an integer. --- python/lsst/daf/butler/core/dimensions/_records.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lsst/daf/butler/core/dimensions/_records.py b/python/lsst/daf/butler/core/dimensions/_records.py index 5da3e6b495..6de6a99954 100644 --- a/python/lsst/daf/butler/core/dimensions/_records.py +++ b/python/lsst/daf/butler/core/dimensions/_records.py @@ -146,7 +146,7 @@ class SerializedDimensionRecord(_BaseModelCompat): ) # Use strict types to prevent casting - record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]] = Field( + record: dict[str, None | StrictInt | StrictFloat | StrictStr | StrictBool | tuple[int, int]] = Field( ..., title="Dimension record keys and values.", examples=[