Skip to content

Commit

Permalink
Merge pull request #881 from lsst/tickets/DM-40435
Browse files Browse the repository at this point in the history
DM-40435: Add fallback check for existence with getDeferred
  • Loading branch information
timj committed Aug 18, 2023
2 parents 87d628b + 40e041c commit c6c2f70
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 15 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ repos:
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 23.3.0
rev: 23.7.0
hooks:
- id: black
# It is recommended to specify the latest version of Python
Expand All @@ -23,6 +23,6 @@ repos:
name: isort (python)
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.0.277
rev: v0.0.285
hooks:
- id: ruff
8 changes: 5 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,11 @@ select = [
"D", # pydocstyle
]
target-version = "py311"
extend-select = [
"RUF100", # Warn about unused noqa
]
# Commented out to suppress "unused noqa" in jenkins which has older ruff not
# generating E721.
#extend-select = [
# "RUF100", # Warn about unused noqa
#]

[tool.ruff.per-file-ignores]
# parserYacc docstrings can not be fixed. Docstrings are used to define grammar.
Expand Down
9 changes: 7 additions & 2 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1341,9 +1341,14 @@ def getDeferred(
Raised if no collections were provided.
"""
if isinstance(datasetRefOrType, DatasetRef):
if not self._datastore.knows(datasetRefOrType):
# Do the quick check first and if that fails, check for artifact
# existence. This is necessary for datastores that are configured
# in trust mode where there won't be a record but there will be
# a file.
if self._datastore.knows(datasetRefOrType) or self._datastore.exists(datasetRefOrType):
ref = datasetRefOrType
else:
raise LookupError(f"Dataset reference {datasetRefOrType} does not exist.")
ref = datasetRefOrType
else:
ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs)
return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
Expand Down
6 changes: 4 additions & 2 deletions python/lsst/daf/butler/core/dimensions/_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ class SerializedDimensionRecord(_BaseModelCompat):
)

# Use strict types to prevent casting
record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]] = Field(
record: dict[str, None | StrictInt | StrictFloat | StrictStr | StrictBool | tuple[int, int]] = Field(
...,
title="Dimension record keys and values.",
examples=[
Expand Down Expand Up @@ -190,7 +190,9 @@ def direct(
# This method requires tuples as values of the mapping, but JSON
# readers will read things in as lists. Be kind and transparently
# transform to tuples
_recItems = {k: v if type(v) != list else tuple(v) for k, v in record.items()} # type: ignore
_recItems = {
k: v if type(v) != list else tuple(v) for k, v in record.items() # type: ignore # noqa: E721
}

# Type ignore because the ternary statement seems to confuse mypy
# based on conflicting inferred types of v.
Expand Down
29 changes: 23 additions & 6 deletions tests/test_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1559,6 +1559,11 @@ def testPruneDatasets(self) -> None:
for ref in refs:
butler.put(metric, ref)

# Confirm we can retrieve deferred.
dref1 = butler.getDeferred(ref1) # known and exists
metric1 = dref1.get()
self.assertEqual(metric1, metric)

# Test different forms of file availability.
# Need to be in a state where:
# - one ref just has registry record.
Expand Down Expand Up @@ -1603,6 +1608,14 @@ def testPruneDatasets(self) -> None:
for ref, exists in exists_many.items():
self.assertEqual(butler.exists(ref, full_check=False), exists)

# Get deferred checks for existence before it allows it to be
# retrieved.
with self.assertRaises(LookupError):
butler.getDeferred(ref3) # not known, file exists
dref2 = butler.getDeferred(ref2) # known but file missing
with self.assertRaises(FileNotFoundError):
dref2.get()

# Test again with a trusting butler.
butler._datastore.trustGetRequest = True
exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True)
Expand All @@ -1611,6 +1624,12 @@ def testPruneDatasets(self) -> None:
self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE)
self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT)

# When trusting we can get a deferred dataset handle that is not
# known but does exist.
dref3 = butler.getDeferred(ref3)
metric3 = dref3.get()
self.assertEqual(metric3, metric)

# Check that per-ref query gives the same answer as many query.
for ref, exists in exists_many.items():
self.assertEqual(butler.exists(ref, full_check=True), exists)
Expand Down Expand Up @@ -2056,12 +2075,10 @@ def testTransferUuidToUuid(self) -> None:
self.assertButlerTransfers()

def _enable_trust(self, datastore: Datastore) -> None:
if hasattr(datastore, "trustGetRequest"):
datastore.trustGetRequest = True
elif hasattr(datastore, "datastores"):
for this_datastore in datastore.datastores:
if hasattr(this_datastore, "trustGetRequest"):
this_datastore.trustGetRequest = True
datastores = getattr(datastore, "datastores", [datastore])
for this_datastore in datastores:
if hasattr(this_datastore, "trustGetRequest"):
this_datastore.trustGetRequest = True

def testTransferMissing(self) -> None:
"""Test transfers where datastore records are missing.
Expand Down

0 comments on commit c6c2f70

Please sign in to comment.