Skip to content

Commit

Permalink
Fall back on exists() if datastore doesn't know about the deferred ref
Browse files Browse the repository at this point in the history
This is needed for execution butler where datastore doesn't
know about files that do exist.
  • Loading branch information
timj committed Aug 18, 2023
1 parent ba717a6 commit a2dbe3f
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 2 deletions.
9 changes: 7 additions & 2 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1341,9 +1341,14 @@ def getDeferred(
Raised if no collections were provided.
"""
if isinstance(datasetRefOrType, DatasetRef):
if not self._datastore.knows(datasetRefOrType):
# Do the quick check first and if that fails, check for artifact
# existence. This is necessary for datastores that are configured
# in trust mode where there won't be a record but there will be
# a file.
if self._datastore.knows(datasetRefOrType) or self._datastore.exists(datasetRefOrType):
ref = datasetRefOrType
else:
raise LookupError(f"Dataset reference {datasetRefOrType} does not exist.")
ref = datasetRefOrType
else:
ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs)
return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
Expand Down
19 changes: 19 additions & 0 deletions tests/test_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1559,6 +1559,11 @@ def testPruneDatasets(self) -> None:
for ref in refs:
butler.put(metric, ref)

# Confirm we can retrieve deferred.
dref1 = butler.getDeferred(ref1) # known and exists
metric1 = dref1.get()
self.assertEqual(metric1, metric)

# Test different forms of file availability.
# Need to be in a state where:
# - one ref just has registry record.
Expand Down Expand Up @@ -1603,6 +1608,14 @@ def testPruneDatasets(self) -> None:
for ref, exists in exists_many.items():
self.assertEqual(butler.exists(ref, full_check=False), exists)

# Get deferred checks for existence before it allows it to be
# retrieved.
with self.assertRaises(LookupError):
butler.getDeferred(ref3) # not known, file exists
dref2 = butler.getDeferred(ref2) # known but file missing
with self.assertRaises(FileNotFoundError):
dref2.get()

# Test again with a trusting butler.
butler._datastore.trustGetRequest = True
exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True)
Expand All @@ -1611,6 +1624,12 @@ def testPruneDatasets(self) -> None:
self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE)
self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT)

# When trusting we can get a deferred dataset handle that is not
# known but does exist.
dref3 = butler.getDeferred(ref3)
metric3 = dref3.get()
self.assertEqual(metric3, metric)

# Check that per-ref query gives the same answer as many query.
for ref, exists in exists_many.items():
self.assertEqual(butler.exists(ref, full_check=True), exists)
Expand Down

0 comments on commit a2dbe3f

Please sign in to comment.