diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 942a726d2f..2afb5d14fa 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.10", "3.11"] + python-version: ["3.11", "3.12"] steps: - uses: actions/checkout@v3 @@ -110,7 +110,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: "3.10" + python-version: "3.11" - name: Install dependencies run: | diff --git a/pyproject.toml b/pyproject.toml index 11c075bd79..4486175d51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "lsst-daf-butler" -requires-python = ">=3.10.0" +requires-python = ">=3.11.0" description = "An abstraction layer for reading and writing astronomical data to datastores." license = {text = "BSD 3-Clause License"} readme = "README.md" @@ -16,7 +16,7 @@ classifiers = [ "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering :: Astronomy", ] diff --git a/python/lsst/daf/butler/_storage_class.py b/python/lsst/daf/butler/_storage_class.py index f66241e13f..5f54889094 100644 --- a/python/lsst/daf/butler/_storage_class.py +++ b/python/lsst/daf/butler/_storage_class.py @@ -724,11 +724,11 @@ def addFromConfig(self, config: StorageClassConfig | Config | str) -> None: # components or parents before their classes are defined # we have a helper function that we can call recursively # to extract definitions from the configuration. - def processStorageClass(name: str, sconfig: StorageClassConfig, msg: str = "") -> None: + def processStorageClass(name: str, _sconfig: StorageClassConfig, msg: str = "") -> None: # Maybe we've already processed this through recursion - if name not in sconfig: + if name not in _sconfig: return - info = sconfig.pop(name) + info = _sconfig.pop(name) # Always create the storage class so we can ensure that # we are not trying to overwrite with a different definition @@ -757,9 +757,31 @@ def processStorageClass(name: str, sconfig: StorageClassConfig, msg: str = "") - baseClass = None if "inheritsFrom" in info: baseName = info["inheritsFrom"] - if baseName not in self: + + # The inheritsFrom feature requires that the storage class + # being inherited from is itself a subclass of StorageClass + # that was created with makeNewStorageClass. If it was made + # and registered with a simple StorageClass constructor it + # cannot be used here and we try to recreate it. + if baseName in self: + baseClass = type(self.getStorageClass(baseName)) + if baseClass is StorageClass: + log.warning( + "Storage class %s is requested to inherit from %s but that storage class " + "has not been defined to be a subclass of StorageClass and so can not " + "be used. Attempting to recreate parent class from current configuration.", + name, + baseName, + ) + processStorageClass(baseName, sconfig, msg) + else: processStorageClass(baseName, sconfig, msg) baseClass = type(self.getStorageClass(baseName)) + if baseClass is StorageClass: + raise TypeError( + f"Configuration for storage class {name} requests to inherit from " + f" storage class {baseName} but that class is not defined correctly." + ) newStorageClassType = self.makeNewStorageClass(name, baseClass, **storageClassKwargs) newStorageClass = newStorageClassType() @@ -942,6 +964,9 @@ def registerStorageClass(self, storageClass: StorageClass, msg: str | None = Non f"New definition for StorageClass {storageClass.name} ({storageClass!r}) " f"differs from current definition ({existing!r}){errmsg}" ) + if type(existing) is StorageClass and type(storageClass) is not StorageClass: + # Replace generic with specialist subclass equivalent. + self._storageClasses[storageClass.name] = storageClass else: self._storageClasses[storageClass.name] = storageClass diff --git a/python/lsst/daf/butler/cli/cliLog.py b/python/lsst/daf/butler/cli/cliLog.py index 45ac6e4b46..720c19acbe 100644 --- a/python/lsst/daf/butler/cli/cliLog.py +++ b/python/lsst/daf/butler/cli/cliLog.py @@ -56,7 +56,7 @@ class PrecisionLogFormatter(logging.Formatter): def formatTime(self, record: logging.LogRecord, datefmt: str | None = None) -> str: """Format the time as an aware datetime.""" - ct: datetime.datetime = self.converter(record.created, tz=datetime.timezone.utc) # type: ignore + ct: datetime.datetime = self.converter(record.created, tz=datetime.UTC) # type: ignore if self.use_local: ct = ct.astimezone() if datefmt: diff --git a/python/lsst/daf/butler/datastore/cache_manager.py b/python/lsst/daf/butler/datastore/cache_manager.py index bf0af433d5..8196cf2a45 100644 --- a/python/lsst/daf/butler/datastore/cache_manager.py +++ b/python/lsst/daf/butler/datastore/cache_manager.py @@ -167,7 +167,7 @@ def from_file(cls, file: ResourcePath, root: ResourcePath) -> CacheEntry: size=stat.st_size, ref=id_, component=component, - ctime=datetime.datetime.utcfromtimestamp(stat.st_ctime), + ctime=datetime.datetime.fromtimestamp(stat.st_ctime, datetime.UTC), ) @@ -239,7 +239,7 @@ def items(self) -> ItemsView[str, CacheEntry]: name="marker", size=0, ref=uuid.UUID("{00000000-0000-0000-0000-000000000000}"), - ctime=datetime.datetime.utcfromtimestamp(0), + ctime=datetime.datetime.fromtimestamp(0, datetime.UTC), ) def pop(self, key: str, default: CacheEntry | None = __marker) -> CacheEntry | None: @@ -967,7 +967,7 @@ def _expire_cache(self) -> None: return if self._expiration_mode == "age": - now = datetime.datetime.utcnow() + now = datetime.datetime.now(datetime.UTC) for key in self._sort_cache(): delta = now - self._cache_entries[key].ctime if delta.seconds > self._expiration_threshold: diff --git a/python/lsst/daf/butler/datastore/file_templates.py b/python/lsst/daf/butler/datastore/file_templates.py index 7e6e09d650..a48547d225 100644 --- a/python/lsst/daf/butler/datastore/file_templates.py +++ b/python/lsst/daf/butler/datastore/file_templates.py @@ -201,7 +201,7 @@ def validateTemplates( log.critical("Template failure with key '%s': %s", matchKey, e) if logFailures and unmatchedKeys: - log.warning("Unchecked keys: %s", ", ".join([str(k) for k in unmatchedKeys])) + log.warning("Unchecked keys: '%s'", ", ".join([str(k) for k in unmatchedKeys])) if failed: if len(failed) == 1: diff --git a/python/lsst/daf/butler/datastores/chainedDatastore.py b/python/lsst/daf/butler/datastores/chainedDatastore.py index 28986d7a23..fb19b52227 100644 --- a/python/lsst/daf/butler/datastores/chainedDatastore.py +++ b/python/lsst/daf/butler/datastores/chainedDatastore.py @@ -461,7 +461,7 @@ def put_new(self, inMemoryDataset: Any, ref: DatasetRef) -> Mapping[str, Dataset npermanent = 0 nephemeral = 0 stored_refs: dict[str, DatasetRef] = {} - for datastore, constraints in zip(self.datastores, self.datastoreConstraints): + for datastore, constraints in zip(self.datastores, self.datastoreConstraints, strict=True): if ( constraints is not None and not constraints.isAcceptable(ref) ) or not datastore.constraints.isAcceptable(ref): diff --git a/python/lsst/daf/butler/logging.py b/python/lsst/daf/butler/logging.py index 957fe7970c..3a5ea11a6a 100644 --- a/python/lsst/daf/butler/logging.py +++ b/python/lsst/daf/butler/logging.py @@ -111,6 +111,11 @@ def MDCRemove(cls, key: str) -> None: """ cls._MDC.pop(key, None) + @classmethod + def clear_mdc(cls) -> None: + """Clear all MDC entries.""" + cls._MDC.clear() + @classmethod @contextmanager def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]: @@ -231,7 +236,7 @@ def from_record(cls, record: LogRecord) -> "ButlerLogRecord": # Always use UTC because in distributed systems we can't be sure # what timezone localtime is and it's easier to compare logs if # every system is using the same time. - record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.timezone.utc) + record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.UTC) # Sometimes exception information is included so must be # extracted. diff --git a/python/lsst/daf/butler/registry/datasets/byDimensions/_storage.py b/python/lsst/daf/butler/registry/datasets/byDimensions/_storage.py index 2a015b502c..bdb17496c8 100644 --- a/python/lsst/daf/butler/registry/datasets/byDimensions/_storage.py +++ b/python/lsst/daf/butler/registry/datasets/byDimensions/_storage.py @@ -32,8 +32,8 @@ __all__ = ("ByDimensionsDatasetRecordStorage",) +import datetime from collections.abc import Callable, Iterable, Iterator, Sequence, Set -from datetime import datetime from typing import TYPE_CHECKING import astropy.time @@ -586,13 +586,13 @@ def insert( # Current timestamp, type depends on schema version. Use microsecond # precision for astropy time to keep things consistent with # TIMESTAMP(6) SQL type. - timestamp: datetime | astropy.time.Time + timestamp: datetime.datetime | astropy.time.Time if self._use_astropy: - # Astropy `now()` precision should be the same as `utcnow()` which + # Astropy `now()` precision should be the same as `now()` which # should mean microsecond. timestamp = astropy.time.Time.now() else: - timestamp = datetime.utcnow() + timestamp = datetime.datetime.now(datetime.UTC) # Iterate over data IDs, transforming a possibly-single-pass iterable # into a list. @@ -647,11 +647,11 @@ def import_( # Current timestamp, type depends on schema version. if self._use_astropy: - # Astropy `now()` precision should be the same as `utcnow()` which + # Astropy `now()` precision should be the same as `now()` which # should mean microsecond. timestamp = sqlalchemy.sql.literal(astropy.time.Time.now(), type_=ddl.AstropyTimeNsecTai) else: - timestamp = sqlalchemy.sql.literal(datetime.utcnow()) + timestamp = sqlalchemy.sql.literal(datetime.datetime.now(datetime.UTC)) # Iterate over data IDs, transforming a possibly-single-pass iterable # into a list. diff --git a/python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py b/python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py index 68acd50501..d84ff3c3ad 100644 --- a/python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py +++ b/python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py @@ -78,7 +78,7 @@ (?P (?P-?(\d+(\.\d*)|(\.\d+))) # floating point number | - (?P\d+-\d+-\d+([ T]\d+:\d+(:\d+([.]\d*)?)?)?) # iso(t) + (?P\d+-\d+-\d+([ T]\d+:\d+(:\d+([.]\d*)?)?)?) # iso(t) [no timezone] | (?P[+]\d+-\d+-\d+(T\d+:\d+:\d+([.]\d*)?)?) # fits | @@ -108,6 +108,11 @@ def _parseTimeString(time_str): ValueError Raised if input string has unexpected format """ + # Check for time zone. Python datetime objects can be timezone-aware + # and if one has been stringified then there will be a +00:00 on the end. + # Special case UTC. Fail for other timezones. + time_str = time_str.replace("+00:00", "") + match = _re_time_str.match(time_str) if not match: raise ValueError(f'Time string "{time_str}" does not match known formats') diff --git a/python/lsst/daf/butler/registry/tests/_registry.py b/python/lsst/daf/butler/registry/tests/_registry.py index fbe074c872..487dd26c26 100644 --- a/python/lsst/daf/butler/registry/tests/_registry.py +++ b/python/lsst/daf/butler/registry/tests/_registry.py @@ -30,6 +30,7 @@ __all__ = ["RegistryTests"] +import datetime import itertools import logging import os @@ -39,7 +40,7 @@ from abc import ABC, abstractmethod from collections import defaultdict, namedtuple from collections.abc import Iterator -from datetime import datetime, timedelta +from datetime import timedelta from typing import TYPE_CHECKING import astropy.time @@ -2481,9 +2482,9 @@ def testSkipCalibs(self): def testIngestTimeQuery(self): registry = self.makeRegistry() self.loadData(registry, "base.yaml") - dt0 = datetime.utcnow() + dt0 = datetime.datetime.now(datetime.UTC) self.loadData(registry, "datasets.yaml") - dt1 = datetime.utcnow() + dt1 = datetime.datetime.now(datetime.UTC) datasets = list(registry.queryDatasets(..., collections=...)) len0 = len(datasets) diff --git a/python/lsst/daf/butler/tests/_examplePythonTypes.py b/python/lsst/daf/butler/tests/_examplePythonTypes.py index 2c86b12499..9631c5c9b4 100644 --- a/python/lsst/daf/butler/tests/_examplePythonTypes.py +++ b/python/lsst/daf/butler/tests/_examplePythonTypes.py @@ -77,6 +77,9 @@ def registerMetricsExample(butler: Butler) -> None: can be retrieved as dataset components. ``StructuredDataNoComponents`` A monolithic write of a `MetricsExample`. + + These definitions must match the equivalent definitions in the test YAML + files. """ yamlDict = _addFullStorageClass( butler, @@ -101,6 +104,7 @@ def registerMetricsExample(butler: Butler) -> None: pytype=MetricsExample, parameters={"slice"}, delegate="lsst.daf.butler.tests.MetricsDelegate", + converters={"dict": "lsst.daf.butler.tests.MetricsExample.makeFromDict"}, ) _addFullStorageClass( @@ -117,9 +121,7 @@ def registerMetricsExample(butler: Butler) -> None: ) -def _addFullStorageClass( - butler: Butler, name: str, formatter: str, *args: Any, **kwargs: Any -) -> StorageClass: +def _addFullStorageClass(butler: Butler, name: str, formatter: str, **kwargs: Any) -> StorageClass: """Create a storage class-formatter pair in a repository if it does not already exist. @@ -132,7 +134,6 @@ def _addFullStorageClass( formatter : `str` The formatter to use with the storage class. Ignored if ``butler`` does not use formatters. - *args **kwargs Arguments, other than ``name``, to the `~lsst.daf.butler.StorageClass` constructor. @@ -145,7 +146,11 @@ class : `lsst.daf.butler.StorageClass` """ storageRegistry = butler._datastore.storageClassFactory - storage = StorageClass(name, *args, **kwargs) + # Use the special constructor to allow a subclass of storage class + # to be created. This allows other test storage classes to inherit from + # this one. + storage_type = storageRegistry.makeNewStorageClass(name, None, **kwargs) + storage = storage_type() try: storageRegistry.registerStorageClass(storage) except ValueError: diff --git a/tests/test_butler.py b/tests/test_butler.py index fcf9f85fca..17d6744c51 100644 --- a/tests/test_butler.py +++ b/tests/test_butler.py @@ -317,59 +317,68 @@ def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Dir butler, ref, ("summary", "data", "output"), metric, collections=this_run ) + primary_uri, secondary_uris = butler.getURIs(ref) + n_uris = len(secondary_uris) + if primary_uri: + n_uris += 1 + # Can the artifacts themselves be retrieved? if not butler._datastore.isEphemeral: - root_uri = ResourcePath(self.root) - - for preserve_path in (True, False): - destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") - # Use copy so that we can test that overwrite - # protection works (using "auto" for File URIs would - # use hard links and subsequent transfer would work - # because it knows they are the same file). - transferred = butler.retrieveArtifacts( - [ref], destination, preserve_path=preserve_path, transfer="copy" - ) - self.assertGreater(len(transferred), 0) - artifacts = list(ResourcePath.findFileResources([destination])) - self.assertEqual(set(transferred), set(artifacts)) - - for artifact in transferred: - path_in_destination = artifact.relative_to(destination) - self.assertIsNotNone(path_in_destination) - assert path_in_destination is not None - - # when path is not preserved there should not be - # any path separators. - num_seps = path_in_destination.count("/") - if preserve_path: - self.assertGreater(num_seps, 0) - else: - self.assertEqual(num_seps, 0) - - primary_uri, secondary_uris = butler.getURIs(ref) - n_uris = len(secondary_uris) - if primary_uri: - n_uris += 1 - self.assertEqual( - len(artifacts), - n_uris, - "Comparing expected artifacts vs actual:" - f" {artifacts} vs {primary_uri} and {secondary_uris}", - ) + # Create a temporary directory to hold the retrieved + # artifacts. + with tempfile.TemporaryDirectory( + prefix="butler-artifacts-", ignore_cleanup_errors=True + ) as artifact_root: + root_uri = ResourcePath(artifact_root, forceDirectory=True) + + for preserve_path in (True, False): + destination = root_uri.join(f"{preserve_path}_{counter}/") + log = logging.getLogger("lsst.x") + log.warning("Using destination %s for args %s", destination, args) + # Use copy so that we can test that overwrite + # protection works (using "auto" for File URIs + # would use hard links and subsequent transfer + # would work because it knows they are the same + # file). + transferred = butler.retrieveArtifacts( + [ref], destination, preserve_path=preserve_path, transfer="copy" + ) + self.assertGreater(len(transferred), 0) + artifacts = list(ResourcePath.findFileResources([destination])) + self.assertEqual(set(transferred), set(artifacts)) + + for artifact in transferred: + path_in_destination = artifact.relative_to(destination) + self.assertIsNotNone(path_in_destination) + assert path_in_destination is not None + + # When path is not preserved there should not + # be any path separators. + num_seps = path_in_destination.count("/") + if preserve_path: + self.assertGreater(num_seps, 0) + else: + self.assertEqual(num_seps, 0) + + self.assertEqual( + len(artifacts), + n_uris, + "Comparing expected artifacts vs actual:" + f" {artifacts} vs {primary_uri} and {secondary_uris}", + ) - if preserve_path: - # No need to run these twice - with self.assertRaises(ValueError): - butler.retrieveArtifacts([ref], destination, transfer="move") + if preserve_path: + # No need to run these twice + with self.assertRaises(ValueError): + butler.retrieveArtifacts([ref], destination, transfer="move") - with self.assertRaises(FileExistsError): - butler.retrieveArtifacts([ref], destination) + with self.assertRaises(FileExistsError): + butler.retrieveArtifacts([ref], destination) - transferred_again = butler.retrieveArtifacts( - [ref], destination, preserve_path=preserve_path, overwrite=True - ) - self.assertEqual(set(transferred_again), set(transferred)) + transferred_again = butler.retrieveArtifacts( + [ref], destination, preserve_path=preserve_path, overwrite=True + ) + self.assertEqual(set(transferred_again), set(transferred)) # Now remove the dataset completely. butler.pruneDatasets([ref], purge=True, unstore=True) @@ -2300,7 +2309,7 @@ def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "St self.target_butler.registry.refresh() # Now transfer them to the second butler, including dimensions. - with self.assertLogs(level=logging.DEBUG) as log_cm: + with self.assertLogs(logger="lsst", level=logging.DEBUG) as log_cm: transferred = self.target_butler.transfer_from( self.source_butler, source_refs, diff --git a/tests/test_cliCmdConfigValidate.py b/tests/test_cliCmdConfigValidate.py index 98181165bf..5520a748b4 100644 --- a/tests/test_cliCmdConfigValidate.py +++ b/tests/test_cliCmdConfigValidate.py @@ -64,7 +64,7 @@ def testConfigValidate(self): # verify the just-created repo validates without error result = self.runner.invoke(butler.cli, ["config-validate", "here"]) self.assertEqual(result.exit_code, 0, result.stdout) - self.assertEqual(result.stdout, "No problems encountered with configuration.\n") + self.assertIn("No problems encountered with configuration.", result.stdout) def testConfigValidate_ignore(self): """Test the ignore flag""" @@ -84,7 +84,7 @@ def testConfigValidate_ignore(self): ], ) self.assertEqual(result.exit_code, 0, result.stdout) - self.assertEqual(result.stdout, "No problems encountered with configuration.\n") + self.assertIn("No problems encountered with configuration.", result.stdout) if __name__ == "__main__": diff --git a/tests/test_logging.py b/tests/test_logging.py index 89f8373f18..5693ca0445 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -55,6 +55,7 @@ def tearDown(self): if self.handler and self.log: self.log.removeHandler(self.handler) ButlerMDC.restore_log_record_factory() + ButlerMDC.clear_mdc() def testRecordCapture(self): """Test basic log capture and serialization."""