Skip to content

Commit

Permalink
Fix conversion of Zarr string dataset to HDF5
Browse files Browse the repository at this point in the history
  • Loading branch information
oruebel committed Aug 15, 2024
1 parent 49a60df commit db51abd
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 2 deletions.
6 changes: 6 additions & 0 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -923,6 +923,12 @@ def __resolve_dtype__(cls, dtype, data):
# TODO: These values exist, but I haven't solved them yet
# binary
# number

# Use text dtype for Zarr datasets of strings. Zarr stores variable lenght strings

Check failure on line 927 in src/hdmf/backends/hdf5/h5tools.py

View workflow job for this annotation

GitHub Actions / Check for spelling errors

lenght ==> length
# as objects so we need to detect this special case here
if hasattr(data, 'attrs') and 'zarr_dtype' in data.attrs and data.attrs['zarr_dtype'] == 'str':
return cls.__dtypes['text']

Check warning on line 930 in src/hdmf/backends/hdf5/h5tools.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/backends/hdf5/h5tools.py#L930

Added line #L930 was not covered by tests

dtype = cls.__resolve_dtype_helper__(dtype)
if dtype is None:
dtype = cls.get_type(data)
Expand Down
12 changes: 10 additions & 2 deletions src/hdmf/build/objectmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,10 +209,18 @@ def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901
if (isinstance(value, np.ndarray) or
(hasattr(value, 'astype') and hasattr(value, 'dtype'))):
if spec_dtype_type is _unicode:
ret = value.astype('U')
if hasattr(value, 'attrs') and 'zarr_dtype' in value.attrs:
# Zarr stores strings as objects so we cannot convert to unicode dtype
ret = value

Check warning on line 214 in src/hdmf/build/objectmapper.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/build/objectmapper.py#L214

Added line #L214 was not covered by tests
else:
ret = value.astype('U')
ret_dtype = "utf8"
elif spec_dtype_type is _ascii:
ret = value.astype('S')
if hasattr(value, 'attrs') and 'zarr_dtype' in value.attrs:
# Zarr stores strings as objects so we cannot convert to unicode dtype
ret = value

Check warning on line 221 in src/hdmf/build/objectmapper.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/build/objectmapper.py#L221

Added line #L221 was not covered by tests
else:
ret = value.astype('S')
ret_dtype = "ascii"
else:
dtype_func, warning_msg = cls.__resolve_numeric_dtype(value.dtype, spec_dtype_type)
Expand Down

0 comments on commit db51abd

Please sign in to comment.