Merge branch 'dev' into fix/zarr_str_export

hdmf-dev · Aug 19, 2024 · 2b90d21 · 2b90d21
2 parents 6f759a6 + 316ec4b
commit 2b90d21
Show file tree

Hide file tree

Showing 9 changed files with 41 additions and 29 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,8 @@
 
 ### Enhancements
 - Added support to append to a dataset of references for HDMF-Zarr. @mavaylon1 [#1157](https://github.com/hdmf-dev/hdmf/pull/1157)
+- Adjusted stacklevel of warnings to point to user code when possible. @rly [#1166](https://github.com/hdmf-dev/hdmf/pull/1166)
+- Improved "already exists" error message when adding a container to a `MultiContainerInterface`. @rly [#1165](https://github.com/hdmf-dev/hdmf/pull/1165)
 
 ### Bug fixes
 - Fixed bug when converting string datasets from Zarr to HDF5. @oruebel [#1171](https://github.com/hdmf-dev/hdmf/pull/1171)
@@ -14,6 +16,8 @@
 - Added new attribute "dimension_labels" on `DatasetBuilder` which specifies the names of the dimensions used in the
 dataset based on the shape of the dataset data and the dimension names in the spec for the data type. This attribute
 is available on build (during the write process), but not on read of a dataset from a file. @rly [#1081](https://github.com/hdmf-dev/hdmf/pull/1081)
+- Speed up loading namespaces by skipping register_type when already registered. @magland [#1102](https://github.com/hdmf-dev/hdmf/pull/1102)
+- Speed up namespace loading: return a shallow copy rather than a deep copy in build_const_args. @magland [#1103](https://github.com/hdmf-dev/hdmf/pull/1103)
 
 ## HDMF 3.14.2 (July 7, 2024)
 

diff --git a/src/hdmf/backends/hdf5/h5_utils.py b/src/hdmf/backends/hdf5/h5_utils.py
@@ -501,7 +501,7 @@ def __init__(self, **kwargs):
         # Check for possible collision with other parameters
         if not isinstance(getargs('data', kwargs), Dataset) and self.__link_data:
             self.__link_data = False
-            warnings.warn('link_data parameter in H5DataIO will be ignored', stacklevel=2)
+            warnings.warn('link_data parameter in H5DataIO will be ignored', stacklevel=3)
         # Call the super constructor and consume the data parameter
         super().__init__(**kwargs)
         # Construct the dict with the io args, ignoring all options that were set to None
@@ -525,7 +525,7 @@ def __init__(self, **kwargs):
                 self.__iosettings.pop('compression', None)
                 if 'compression_opts' in self.__iosettings:
                     warnings.warn('Compression disabled by compression=False setting. ' +
-                                  'compression_opts parameter will, therefore, be ignored.', stacklevel=2)
+                                  'compression_opts parameter will, therefore, be ignored.', stacklevel=3)
                     self.__iosettings.pop('compression_opts', None)
         # Validate the compression options used
         self._check_compression_options()
@@ -540,7 +540,7 @@ def __init__(self, **kwargs):
         if isinstance(self.data, Dataset):
             for k in self.__iosettings.keys():
                 warnings.warn("%s in H5DataIO will be ignored with H5DataIO.data being an HDF5 dataset" % k,
-                              stacklevel=2)
+                              stacklevel=3)
 
         self.__dataset = None
 
@@ -618,7 +618,7 @@ def _check_compression_options(self):
             if self.__iosettings['compression'] not in ['gzip', h5py_filters.h5z.FILTER_DEFLATE]:
                 warnings.warn(str(self.__iosettings['compression']) + " compression may not be available "
                               "on all installations of HDF5. Use of gzip is recommended to ensure portability of "
-                              "the generated HDF5 files.", stacklevel=3)
+                              "the generated HDF5 files.", stacklevel=4)
 
     @staticmethod
     def filter_available(filter, allow_plugin_filters):

diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
@@ -344,7 +344,7 @@ def copy_file(self, **kwargs):
         warnings.warn("The copy_file class method is no longer supported and may be removed in a future version of "
                       "HDMF. Please use the export method or h5py.File.copy method instead.",
                       category=DeprecationWarning,
-                      stacklevel=2)
+                      stacklevel=3)
 
         source_filename, dest_filename, expand_external, expand_refs, expand_soft = getargs('source_filename',
                                                                                             'dest_filename',

diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py
@@ -628,7 +628,7 @@ def add_ref(self, **kwargs):
             if entity_uri is not None:
                 entity_uri = entity.entity_uri
                 msg = 'This entity already exists. Ignoring new entity uri'
-                warn(msg, stacklevel=2)
+                warn(msg, stacklevel=3)
 
         #################
         # Validate Object

diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py
@@ -717,7 +717,7 @@ def add_row(self, **kwargs):
                     warn(("Data has elements with different lengths and therefore cannot be coerced into an "
                           "N-dimensional array. Use the 'index' argument when creating a column to add rows "
                           "with different lengths."),
-                         stacklevel=2)
+                         stacklevel=3)
 
     def __eq__(self, other):
         """Compare if the two DynamicTables contain the same data.
@@ -776,7 +776,7 @@ def add_column(self, **kwargs):  # noqa: C901
 
         if isinstance(index, VectorIndex):
             warn("Passing a VectorIndex in for index may lead to unexpected behavior. This functionality will be "
-                 "deprecated in a future version of HDMF.", category=FutureWarning, stacklevel=2)
+                 "deprecated in a future version of HDMF.", category=FutureWarning, stacklevel=3)
 
         if name in self.__colids:  # column has already been added
             msg = "column '%s' already exists in %s '%s'" % (name, self.__class__.__name__, self.name)
@@ -793,7 +793,7 @@ def add_column(self, **kwargs):  # noqa: C901
                        "Please ensure the new column complies with the spec. "
                        "This will raise an error in a future version of HDMF."
                        % (name, self.__class__.__name__, spec_table))
-                warn(msg, stacklevel=2)
+                warn(msg, stacklevel=3)
 
             index_bool = index or not isinstance(index, bool)
             spec_index = self.__uninit_cols[name].get('index', False)
@@ -803,7 +803,7 @@ def add_column(self, **kwargs):  # noqa: C901
                        "Please ensure the new column complies with the spec. "
                        "This will raise an error in a future version of HDMF."
                        % (name, self.__class__.__name__, spec_index))
-                warn(msg, stacklevel=2)
+                warn(msg, stacklevel=3)
 
             spec_col_cls = self.__uninit_cols[name].get('class', VectorData)
             if col_cls != spec_col_cls:
@@ -841,7 +841,7 @@ def add_column(self, **kwargs):  # noqa: C901
                 warn(("Data has elements with different lengths and therefore cannot be coerced into an "
                       "N-dimensional array. Use the 'index' argument when adding a column of data with "
                       "different lengths."),
-                     stacklevel=2)
+                     stacklevel=3)
 
             # Check that we are asked to create an index
             if (isinstance(index, bool) or isinstance(index, int)) and index > 0 and len(data) > 0:

diff --git a/src/hdmf/container.py b/src/hdmf/container.py
@@ -894,7 +894,7 @@ def set_dataio(self, **kwargs):
         warn(
             "Data.set_dataio() is deprecated. Please use Data.set_data_io() instead.",
             DeprecationWarning,
-            stacklevel=2,
+            stacklevel=3,
         )
         dataio = getargs('dataio', kwargs)
         dataio.data = self.__data
@@ -1142,7 +1142,9 @@ def _func(self, **kwargs):
                     # still need to mark self as modified
                     self.set_modified()
                 if tmp.name in d:
-                    msg = "'%s' already exists in %s '%s'" % (tmp.name, cls.__name__, self.name)
+                    msg = (f"Cannot add {tmp.__class__} '{tmp.name}' at 0x{id(tmp)} to dict attribute '{attr_name}' in "
+                           f"{cls} '{self.name}'. {d[tmp.name].__class__} '{tmp.name}' at 0x{id(d[tmp.name])} "
+                           f"already exists in '{attr_name}' and has the same name.")
                     raise ValueError(msg)
                 d[tmp.name] = tmp
             return container

diff --git a/src/hdmf/spec/namespace.py b/src/hdmf/spec/namespace.py
@@ -50,13 +50,13 @@ def __init__(self, **kwargs):
             self['full_name'] = full_name
         if version == str(SpecNamespace.UNVERSIONED):
             # the unversioned version may be written to file as a string and read from file as a string
-            warn("Loaded namespace '%s' is unversioned. Please notify the extension author." % name, stacklevel=2)
+            warn(f"Loaded namespace '{name}' is unversioned. Please notify the extension author.")
             version = SpecNamespace.UNVERSIONED
         if version is None:
             # version is required on write -- see YAMLSpecWriter.write_namespace -- but can be None on read in order to
             # be able to read older files with extensions that are missing the version key.
-            warn(("Loaded namespace '%s' is missing the required key 'version'. Version will be set to '%s'. "
-                  "Please notify the extension author.") % (name, SpecNamespace.UNVERSIONED), stacklevel=2)
+            warn(f"Loaded namespace '{name}' is missing the required key 'version'. Version will be set to "
+                 f"'{SpecNamespace.UNVERSIONED}'. Please notify the extension author.")
             version = SpecNamespace.UNVERSIONED
         self['version'] = version
         if date is not None:
@@ -466,15 +466,19 @@ def __load_namespace(self, namespace, reader, resolve=True):
         return included_types
 
     def __register_type(self, ndt, inc_ns, catalog, registered_types):
-        spec = inc_ns.get_spec(ndt)
-        spec_file = inc_ns.catalog.get_spec_source_file(ndt)
-        self.__register_dependent_types(spec, inc_ns, catalog, registered_types)
-        if isinstance(spec, DatasetSpec):
-            built_spec = self.dataset_spec_cls.build_spec(spec)
+        if ndt in registered_types:
+            # already registered
+            pass
         else:
-            built_spec = self.group_spec_cls.build_spec(spec)
-        registered_types.add(ndt)
-        catalog.register_spec(built_spec, spec_file)
+            spec = inc_ns.get_spec(ndt)
+            spec_file = inc_ns.catalog.get_spec_source_file(ndt)
+            self.__register_dependent_types(spec, inc_ns, catalog, registered_types)
+            if isinstance(spec, DatasetSpec):
+                built_spec = self.dataset_spec_cls.build_spec(spec)
+            else:
+                built_spec = self.group_spec_cls.build_spec(spec)
+            registered_types.add(ndt)
+            catalog.register_spec(built_spec, spec_file)
 
     def __register_dependent_types(self, spec, inc_ns, catalog, registered_types):
         """Ensure that classes for all types used by this type are registered
@@ -529,7 +533,7 @@ def load_namespaces(self, **kwargs):
                 if ns['version'] != self.__namespaces.get(ns['name'])['version']:
                     # warn if the cached namespace differs from the already loaded namespace
                     warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
-                         % (ns['name'], ns['version'], self.__namespaces.get(ns['name'])['version']), stacklevel=2)
+                         % (ns['name'], ns['version'], self.__namespaces.get(ns['name'])['version']))
             else:
                 to_load.append(ns)
         # now load specs into namespace

diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py
@@ -1,7 +1,6 @@
 import re
 from abc import ABCMeta
 from collections import OrderedDict
-from copy import deepcopy
 from warnings import warn
 
 from ..utils import docval, getargs, popargs, get_docval
@@ -84,7 +83,7 @@ class ConstructableDict(dict, metaclass=ABCMeta):
     def build_const_args(cls, spec_dict):
         ''' Build constructor arguments for this ConstructableDict class from a dictionary '''
         # main use cases are when spec_dict is a ConstructableDict or a spec dict read from a file
-        return deepcopy(spec_dict)
+        return spec_dict.copy()
 
     @classmethod
     def build_spec(cls, spec_dict):
@@ -322,7 +321,7 @@ def __init__(self, **kwargs):
         default_name = getargs('default_name', kwargs)
         if default_name:
             if name is not None:
-                warn("found 'default_name' with 'name' - ignoring 'default_name'", stacklevel=2)
+                warn("found 'default_name' with 'name' - ignoring 'default_name'")
             else:
                 self['default_name'] = default_name
         self.__attributes = dict()

diff --git a/tests/unit/test_multicontainerinterface.py b/tests/unit/test_multicontainerinterface.py
@@ -198,7 +198,10 @@ def test_add_single_dup(self):
         """Test that adding a container to the attribute dict correctly adds the container."""
         obj1 = Container('obj1')
         foo = Foo(obj1)
-        msg = "'obj1' already exists in Foo 'Foo'"
+        msg = (f"Cannot add <class 'hdmf.container.Container'> 'obj1' at 0x{id(obj1)} to dict attribute "
+               "'containers' in <class 'tests.unit.test_multicontainerinterface.Foo'> 'Foo'. "
+               f"<class 'hdmf.container.Container'> 'obj1' at 0x{id(obj1)} already exists in 'containers' "
+               "and has the same name.")
         with self.assertRaisesWith(ValueError, msg):
             foo.add_container(obj1)