hdmf-dev · mavaylon1 · Dec 7, 2023 · Nov 27, 2023 · Nov 27, 2023 · Nov 27, 2023
diff --git a/.git_archival.txt b/.git_archival.txt
@@ -0,0 +1,4 @@
+node: $Format:%H$
+node-date: $Format:%cI$
+describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$
+ref-names: $Format:%D$
diff --git a/.gitattributes b/.gitattributes
@@ -1 +1 @@
-src/hdmf_zarr/_version.py export-subst
+.git_archival.txt export-subst
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
@@ -0,0 +1,14 @@
+name: Codespell
+on:
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2
diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
@@ -0,0 +1,11 @@
+name: Ruff
+on: pull_request
+
+jobs:
+  ruff:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+      - name: Run ruff
+        uses: chartboost/ruff-action@v1
diff --git a/.gitignore b/.gitignore
@@ -142,3 +142,6 @@ dmypy.json
 
 # DS_Store
 .DS_Store
+
+# Version
+_version.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,7 @@
 
 ### Enhancements
 * Fix linking for FSSpec and support passing of `storage_options` required reading data from S3 #138. @alejoe91 [#120](https://github.com/hdmf-dev/hdmf-zarr/pull/138)
+* Updated versioning to hatch-vcs and deprecated setup.py in full transition to `pyproject.toml`. @mavaylon1 [#143](https://github.com/hdmf-dev/hdmf-zarr/pull/143)
 
 ## 0.4.0 (October 3, 2023)
 
@@ -64,7 +65,7 @@
   links/reference when moving Zarr files @oruebel [#46](https://github.com/hdmf-dev/hdmf-zarr/pull/46)
 * Fixed bugs in requirements defined in setup.py @oruebel [#46](https://github.com/hdmf-dev/hdmf-zarr/pull/46)
 * Fixed bug regarding Sphinx external links @mavaylon1 [#53](https://github.com/hdmf-dev/hdmf-zarr/pull/53)
-* Updated gallery tests to use test_gallery.py and necessary package dependcies 
+* Updated gallery tests to use test_gallery.py and necessary package dependencies 
   @mavaylon1 [#53](https://github.com/hdmf-dev/hdmf-zarr/pull/53)
 * Updated dateset used in conversion tutorial, which caused warnings 
   @oruebel [#56](https://github.com/hdmf-dev/hdmf-zarr/pull/56)

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -30,8 +30,8 @@
 # This lets us ensure that the source package is imported, and that its
 # version is used.
 sys.path.insert(0, os.path.join(project_root, 'src'))
-from hdmf_zarr._version import get_versions
 
+import hdmf_zarr
 
 # -- Autodoc configuration -----------------------------------------------------
 autoclass_content = 'both'
@@ -46,9 +46,9 @@
 author = 'Oliver Ruebel, Matthew Avaylon'
 
 # The short X.Y version.
-version = '{}'.format(get_versions()['version'])
+version = hdmf_zarr.__version__
 # The full version, including alpha/beta/rc tags.
-release = '{}'.format(get_versions()['version'])
+release = hdmf_zarr.__version__
 
 # -- General configuration ---------------------------------------------------
 

diff --git a/docs/source/overview.rst b/docs/source/overview.rst
@@ -24,7 +24,7 @@ Supported features
 - Iterative data write using :py:class:`~hdmf.data_utils.AbstractDataChunkIterator` 
 - Parallel write with :py:class:`~hdmf.data_utils.GenericDataChunkIterator` (since v0.4)
 - Lazy load of datasets
-- Lazy load of datasets containing object refernces (since v0.4)
+- Lazy load of datasets containing object references (since v0.4)
 
 Known Limitations
 -----------------

diff --git a/docs/source/storage.rst b/docs/source/storage.rst
@@ -174,7 +174,7 @@ as JSON. Each dict (i.e., element) in the list defines a link, with each dict co
   pointing to an object within the same Zarr file, the value of source will be ``"."``. For external
   links that point to object in another Zarr file, the value of source will be the path to
   the other Zarr file relative to the root path of the Zarr file containing the link.
-* ``path`` : Path to the linked object within the Zarr file idenfied by the ``source`` key
+* ``path`` : Path to the linked object within the Zarr file identified by the ``source`` key
 * ``object_id``: Object id of the reference object. May be None in case the referenced object
   does not have an assigned object_id (e.g., in the case we reference a dataset with a fixed
   name but without and assigned ``data_type`` (or ``neurodata_type`` in the case of NWB).

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,3 +1,87 @@
+[build-system]
+requires = ["hatchling", "hatch-vcs"]
+build-backend = "hatchling.build"
+
+[project]
+name = "hdmf_zarr"
+authors = [
+  { name="Oliver Ruebel", email="[email protected]" },
+  { name="Matthew Avaylon", email="[email protected]" },
+]
+description = "A package defining a Zarr I/O backend for HDMF"
+readme = "README.rst"
+requires-python = ">=3.8"
+license = {text = "BSD"}
+classifiers = [
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "License :: OSI Approved :: BSD License",
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "Operating System :: Microsoft :: Windows",
+    "Operating System :: MacOS",
+    "Operating System :: Unix",
+    "Topic :: Scientific/Engineering :: Medical Science Apps."
+]
+dependencies = [
+    'hdmf>=3.9.0',
+    'zarr>=2.11.0',
+    'numpy>=1.24',
+    'numcodecs>=0.9.1',
+    'numcodecs==0.11.0',
+    'pynwb>=2.5.0',
+    'threadpoolctl>=3.1.0',
+]
+dynamic = ["version"]
+
+[project.optional-dependencies]
+tqdm = ["tqdm>=4.41.0"]
+fsspec = ["ffspec"]
+s3fs = ["s3fs"]
+
+[project.urls]
+"Homepage" = "https://github.com/hdmf-dev/hdmf-zarr"
+"Bug Tracker" = "https://github.com/hdmf-dev/hdmf-zarr/issues"
+
+[tool.hatch.version]
+source = "vcs"
+
+[tool.hatch.build.hooks.vcs]
+# this file is created/updated when the package is installed and used in
+# src/hdmf_zarr/__init__.py to set `hdmf_zarr.__version__`
+# this allows the version to be accessible from python
+version-file = "src/hdmf_zarr/_version.py"
+
+[tool.hatch.build.targets.sdist]
+exclude = [".git_archival.txt"]
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/hdmf_zarr"]
+
+[tool.pytest.ini_options]
+addopts = "--cov --cov-report html"
+
+[tool.codespell]
+skip = "htmlcov,.git,.mypy_cache,.pytest_cache,.coverage,*.pdf,*.svg,venvs,.tox,./docs/_build/*,*.ipynb"
+ignore-words-list = "datas"
+
+[tool.coverage.run]
+branch = true
+source = ["src/"]
+omit = [
+    "src/hdmf_zarr/_due.py",
+]
+
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: no cover",
+    "@abstract"
+]
+
 [tool.black]
 line-length = 120
 target-version = ['py38']
@@ -25,3 +109,27 @@ force-exclude = '''
    /docs/*
 )\
 '''
+
+[tool.ruff]
+select = ["E", "F", "T100", "T201", "T203"]
+exclude = [
+  ".git",
+  ".tox",
+  "__pycache__",
+  "build/",
+  "dist/",
+  "docs/source/conf.py",
+  "src/hdmf_zarr/_due.py",
+  "docs/source/tutorials/",
+  "docs/_build/",
+]
+line-length = 120
+
+[tool.ruff.per-file-ignores]
+"docs/gallery/*" = ["E402", "T201"]
+"src/*/__init__.py" = ["F401"]
+"setup.py" = ["T201"]
+"test_gallery.py" = ["T201"]
+
+[tool.ruff.mccabe]
+max-complexity = 17
diff --git a/src/hdmf_zarr/__init__.py b/src/hdmf_zarr/__init__.py
@@ -2,9 +2,15 @@
 from .utils import ZarrDataIO
 from .nwb import NWBZarrIO
 
-from ._version import get_versions  # noqa: E402
-__version__ = get_versions()['version']
-del get_versions
+try:
+    # see https://effigies.gitlab.io/posts/python-packaging-2023/
+    from ._version import __version__
+except ImportError:  # pragma: no cover
+    # this is a relatively slower method for getting the version string
+    from importlib.metadata import version  # noqa: E402
+
+    __version__ = version("hdmf")
+    del version
 
 # Duecredit definitions
 from ._due import due, BibTeX  # noqa: E402

diff --git a/src/hdmf_zarr/_version.py b/src/hdmf_zarr/_version.py
@@ -287,7 +287,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
         # TAG-NUM-gHEX
         mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
         if not mo:
-            # unparseable. Maybe git-describe is misbehaving?
+            # unparsable. Maybe git-describe is misbehaving?
             pieces["error"] = ("unable to parse git-describe output: '%s'"
                                % describe_out)
             return pieces

diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py
@@ -256,7 +256,7 @@ def write(self, **kwargs):
             self.__cache_spec()
 
     def __cache_spec(self):
-        """Interanl function used to cache the spec in the current file"""
+        """Internal function used to cache the spec in the current file"""
         ref = self.__file.attrs.get(SPEC_LOC_ATTR)
         spec_group = None
         if ref is not None:
@@ -348,7 +348,7 @@ def get_written(self, builder, check_on_disk=False):
                               by this I/O backend
         :type check_on_disk: bool
         :return: True if the builder is found in self._written_builders using the builder ID, False otherwise. If
-                 check_on_disk is enabled then the function cals get_builder_exists_on_disk in addtion to verify
+                 check_on_disk is enabled then the function cals get_builder_exists_on_disk in addition to verify
                  that the builder has indeed been written to disk.
         """
         written = self._written_builders.get_written(builder)
@@ -1166,7 +1166,7 @@ def __list_fill__(self, parent, name, data, options=None):  # noqa: C901
         else:
             try:
                 dset[:] = data  # If data is an h5py.Dataset then this will copy the data
-            # For compound data types containing strings Zarr sometimes does not like wirting multiple values
+            # For compound data types containing strings Zarr sometimes does not like writing multiple values
             # try to write them one-at-a-time instead then
             except ValueError:
                 for i in range(len(data)):
@@ -1281,7 +1281,7 @@ def __read_links(self, zarr_obj, parent):
         """
         Read the links associated with a zarr group
         :param zarr_obj: The Zarr group we should read links from
-        :type zarr_obj: zarr.hiearchy.Group
+        :type zarr_obj: zarr.hierarchy.Group
         :param parent: GroupBuilder with which the links need to be associated
         :type parent: GroupBuilder
         """
@@ -1308,7 +1308,7 @@ def __read_dataset(self, zarr_obj, name):
 
         if 'zarr_dtype' in zarr_obj.attrs:
             zarr_dtype = zarr_obj.attrs['zarr_dtype']
-        elif hasattr(zarr_obj, 'dtype'):   # Fallback for invalid files that are mssing zarr_type
+        elif hasattr(zarr_obj, 'dtype'):   # Fallback for invalid files that are missing zarr_type
             zarr_dtype = zarr_obj.dtype
             warnings.warn(
                 "Inferred dtype from zarr type. Dataset missing zarr_dtype: " + str(name) + "   " + str(zarr_obj)

diff --git a/src/hdmf_zarr/utils.py b/src/hdmf_zarr/utils.py
@@ -27,7 +27,7 @@
 # Necessary definitions to avoid parallelization bugs, Inherited from SpikeInterface experience
 # see
 # https://stackoverflow.com/questions/10117073/how-to-use-initializer-to-set-up-my-multiprocess-pool
-# the tricks is : theses 2 variables are global per worker
+# the tricks is : these 2 variables are global per worker
 # so they are not share in the same process
 global _worker_context
 global _operation_to_run
@@ -36,7 +36,7 @@
 class ZarrIODataChunkIteratorQueue(deque):
     """
     Helper class used by ZarrIO to manage the write for DataChunkIterators
-    Each queue element must be a tupple of two elements:
+    Each queue element must be a tuple of two elements:
     1) the dataset to write to and 2) the AbstractDataChunkIterator with the data
     :param number_of_jobs: The number of jobs used to write the datasets. The default is 1.
     :type number_of_jobs: integer
@@ -192,7 +192,7 @@ def exhaust_queue(self):
 
                             results = tqdm(iterable=results, **progress_bar_options)
 
-                            # exector map must be iterated to deploy commands over jobs
+                            # executor map must be iterated to deploy commands over jobs
                             for size_in_MB, result in zip(size_in_MB_per_iteration, results):
                                 results.update(n=int(size_in_MB))  # int() to round down for better display
                         except Exception as exception:  # pragma: no cover
@@ -203,11 +203,11 @@ def exhaust_queue(self):
                                 ),
                                 stacklevel=2,
                             )
-                            # exector map must be iterated to deploy commands over jobs
+                            # executor map must be iterated to deploy commands over jobs
                             for result in results:
                                 pass
                     else:
-                        # exector map must be iterated to deploy commands over jobs
+                        # executor map must be iterated to deploy commands over jobs
                         for result in results:
                             pass
 

diff --git a/tests/unit/base_tests_zarrio.py b/tests/unit/base_tests_zarrio.py
@@ -594,7 +594,7 @@ def test_write_attribute_write_unsupported_list_of_types(self):
 
     def test_write_attributes_write_list_of_bytes(self):
         """
-        Test writing of lists of bytes. Bytes are not JSON serializable and therefore cover a differnt code path.
+        Test writing of lists of bytes. Bytes are not JSON serializable and therefore cover a different code path.
         Note, bytes are here encoded as strings to the return value does not match exactly but the data type changes.
         """
         val = self.__write_attribute_test_helper('attr', [b'a', b'b', b'c', b'd'], assert_value=False)
@@ -1301,7 +1301,7 @@ def test_append_data(self):
             # create a foo with link to existing dataset my_data, add the foo to new foobucket
             # this should make a soft link within the exported file
             # TODO Assigning my_data is the problem. Which in turn causes the export to fail because the Zarr
-            # DataType is not being understood. This is where the External link should be cerated instead?
+            # DataType is not being understood. This is where the External link should be created instead?
             foo2 = Foo('foo2', read_foofile.buckets['bucket1'].foos['foo1'].my_data, "I am foo2", 17, 3.14)
             foobucket2 = FooBucket('bucket2', [foo2])
             read_foofile.add_bucket(foobucket2)

diff --git a/tests/unit/test_fsspec_streaming.py b/tests/unit/test_fsspec_streaming.py
@@ -8,7 +8,7 @@
 class TestFSSpecStreaming(unittest.TestCase):
     @unittest.skipIf(not HAVE_FSSPEC, "fsspec not installed")
     def test_fsspec_streaming(self):
-        # PLACEHOLDER test file from Allen Insitute for Neural Dynamics
+        # PLACEHOLDER test file from Allen Institute for Neural Dynamics
         # TODO: store a small test file and use it to speed up testing
         remote_path = (
             "s3://aind-open-data/ecephys_625749_2022-08-03_15-15-06_nwb_2023-05-16_16-34-55/"

diff --git a/tests/unit/test_io_convert.py b/tests/unit/test_io_convert.py
@@ -447,9 +447,6 @@ class TestHDF5ToZarrDynamicTableC0(MixinTestDynamicTableContainer,
     IGNORE_STRING_TO_BYTE = False
     TABLE_TYPE = 0
 
-    def test_simple(self, write_path=None, export_path=None):
-        print(write_path, export_path)
-
 
 class TestZarrToHDF5DynamicTableC0(MixinTestDynamicTableContainer,
                                    MixinTestZarrToHDF5,