Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-40435: Add fallback check for existence with getDeferred #881

Merged
merged 4 commits into from
Aug 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ repos:
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 23.3.0
rev: 23.7.0
hooks:
- id: black
# It is recommended to specify the latest version of Python
Expand All @@ -23,6 +23,6 @@ repos:
name: isort (python)
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.0.277
rev: v0.0.285
hooks:
- id: ruff
8 changes: 5 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,11 @@ select = [
"D", # pydocstyle
]
target-version = "py311"
extend-select = [
"RUF100", # Warn about unused noqa
]
# Commented out to suppress "unused noqa" in jenkins which has older ruff not
# generating E721.
#extend-select = [
# "RUF100", # Warn about unused noqa
#]

[tool.ruff.per-file-ignores]
# parserYacc docstrings can not be fixed. Docstrings are used to define grammar.
Expand Down
9 changes: 7 additions & 2 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1341,9 +1341,14 @@ def getDeferred(
Raised if no collections were provided.
"""
if isinstance(datasetRefOrType, DatasetRef):
if not self._datastore.knows(datasetRefOrType):
# Do the quick check first and if that fails, check for artifact
# existence. This is necessary for datastores that are configured
# in trust mode where there won't be a record but there will be
# a file.
if self._datastore.knows(datasetRefOrType) or self._datastore.exists(datasetRefOrType):
ref = datasetRefOrType
else:
raise LookupError(f"Dataset reference {datasetRefOrType} does not exist.")
ref = datasetRefOrType
else:
ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs)
return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
Expand Down
6 changes: 4 additions & 2 deletions python/lsst/daf/butler/core/dimensions/_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ class SerializedDimensionRecord(_BaseModelCompat):
)

# Use strict types to prevent casting
record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]] = Field(
record: dict[str, None | StrictInt | StrictFloat | StrictStr | StrictBool | tuple[int, int]] = Field(
...,
title="Dimension record keys and values.",
examples=[
Expand Down Expand Up @@ -190,7 +190,9 @@ def direct(
# This method requires tuples as values of the mapping, but JSON
# readers will read things in as lists. Be kind and transparently
# transform to tuples
_recItems = {k: v if type(v) != list else tuple(v) for k, v in record.items()} # type: ignore
_recItems = {
k: v if type(v) != list else tuple(v) for k, v in record.items() # type: ignore # noqa: E721
}

# Type ignore because the ternary statement seems to confuse mypy
# based on conflicting inferred types of v.
Expand Down
29 changes: 23 additions & 6 deletions tests/test_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1559,6 +1559,11 @@ def testPruneDatasets(self) -> None:
for ref in refs:
butler.put(metric, ref)

# Confirm we can retrieve deferred.
dref1 = butler.getDeferred(ref1) # known and exists
metric1 = dref1.get()
self.assertEqual(metric1, metric)

# Test different forms of file availability.
# Need to be in a state where:
# - one ref just has registry record.
Expand Down Expand Up @@ -1603,6 +1608,14 @@ def testPruneDatasets(self) -> None:
for ref, exists in exists_many.items():
self.assertEqual(butler.exists(ref, full_check=False), exists)

# Get deferred checks for existence before it allows it to be
# retrieved.
with self.assertRaises(LookupError):
butler.getDeferred(ref3) # not known, file exists
dref2 = butler.getDeferred(ref2) # known but file missing
with self.assertRaises(FileNotFoundError):
dref2.get()

# Test again with a trusting butler.
butler._datastore.trustGetRequest = True
exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True)
Expand All @@ -1611,6 +1624,12 @@ def testPruneDatasets(self) -> None:
self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE)
self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT)

# When trusting we can get a deferred dataset handle that is not
# known but does exist.
dref3 = butler.getDeferred(ref3)
metric3 = dref3.get()
self.assertEqual(metric3, metric)

# Check that per-ref query gives the same answer as many query.
for ref, exists in exists_many.items():
self.assertEqual(butler.exists(ref, full_check=True), exists)
Expand Down Expand Up @@ -2056,12 +2075,10 @@ def testTransferUuidToUuid(self) -> None:
self.assertButlerTransfers()

def _enable_trust(self, datastore: Datastore) -> None:
if hasattr(datastore, "trustGetRequest"):
datastore.trustGetRequest = True
elif hasattr(datastore, "datastores"):
for this_datastore in datastore.datastores:
if hasattr(this_datastore, "trustGetRequest"):
this_datastore.trustGetRequest = True
datastores = getattr(datastore, "datastores", [datastore])
for this_datastore in datastores:
if hasattr(this_datastore, "trustGetRequest"):
this_datastore.trustGetRequest = True

def testTransferMissing(self) -> None:
"""Test transfers where datastore records are missing.
Expand Down
Loading