Skip to content

Commit

Permalink
implement group/dataset id as a string
Browse files Browse the repository at this point in the history
  • Loading branch information
magland committed Apr 4, 2024
1 parent 249feda commit 015552b
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 25 deletions.
6 changes: 6 additions & 0 deletions examples/example1.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,9 @@
with pynwb.NWBHDF5IO(file=client, mode="r") as io:
nwbfile = io.read()
print(nwbfile)

print('Electrode group at shank0:')
print(nwbfile.electrode_groups["shank0"]) # type: ignore

print('Electrode group at index 0:')
print(nwbfile.electrodes.group[0]) # type: ignore
14 changes: 5 additions & 9 deletions lindi/LindiH5pyFile/LindiH5pyDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,6 @@
from .LindiH5pyFile import LindiH5pyFile # pragma: no cover


class LindiH5pyDatasetId:
def __init__(self, _h5py_dataset_id):
self._h5py_dataset_id = _h5py_dataset_id


# This is a global list of external hdf5 clients, which are used by
# possibly multiple LindiH5pyFile objects. The key is the URL of the
# external hdf5 file, and the value is the h5py.File object.
Expand All @@ -32,6 +27,9 @@ def __init__(self, _dataset_object: Union[h5py.Dataset, zarr.Array], _file: "Lin
self._file = _file
self._readonly = _file.mode not in ['r+']

# see comment in LindiH5pyGroup
self._id = f'{id(self._file)}/{self._dataset_object.name}'

# See if we have the _COMPOUND_DTYPE attribute, which signifies that
# this is a compound dtype
if isinstance(_dataset_object, zarr.Array):
Expand Down Expand Up @@ -74,10 +72,8 @@ def __init__(self, _dataset_object: Union[h5py.Dataset, zarr.Array], _file: "Lin

@property
def id(self):
if isinstance(self._dataset_object, h5py.Dataset):
return LindiH5pyDatasetId(self._dataset_object.id)
else:
return LindiH5pyDatasetId(None)
# see comment in LindiH5pyGroup
return self._id

@property
def shape(self): # type: ignore
Expand Down
6 changes: 5 additions & 1 deletion lindi/LindiH5pyFile/LindiH5pyFile.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ def __init__(self, _file_object: Union[h5py.File, zarr.Group], *, _zarr_store: U
self._mode: Literal['r', 'r+'] = _mode
self._the_group = LindiH5pyGroup(_file_object, self)

# see comment in LindiH5pyGroup
self._id = f'{id(self._file_object)}/'

@staticmethod
def from_reference_file_system(rfs: Union[dict, str], mode: Literal["r", "r+"] = "r"):
"""
Expand Down Expand Up @@ -298,7 +301,8 @@ def __contains__(self, name):

@property
def id(self):
return self._the_group.id
# see comment in LindiH5pyGroup
return self._id

@property
def file(self):
Expand Down
28 changes: 13 additions & 15 deletions lindi/LindiH5pyFile/LindiH5pyGroup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,23 @@
from .LindiH5pyFile import LindiH5pyFile # pragma: no cover


class LindiH5pyGroupId:
def __init__(self, _h5py_group_id):
self._h5py_group_id = _h5py_group_id


class LindiH5pyGroup(h5py.Group):
def __init__(self, _group_object: Union[h5py.Group, zarr.Group], _file: "LindiH5pyFile"):
self._group_object = _group_object
self._file = _file
self._readonly = _file.mode not in ['r+']

# In h5py, the id property is an object that exposes low-level
# operations specific to the HDF5 library. LINDI aims to override the
# high-level methods such that the low-level operations on id are not
# needed. However, sometimes packages (e.g., pynwb) use the id as a
# unique identifier for purposes of caching. Therefore, we make the id
# to be a string that is unique for each object. If any of the low-level
# operations are attempted on this id string, then an exception will be
# raised, which will usually indicate that one of the high-level methods
# should be overridden.
self._id = f'{id(self._file)}/{self._group_object.name}'

# The self._write object handles all the writing operations
from .writers.LindiH5pyGroupWriter import LindiH5pyGroupWriter # avoid circular import
if self._readonly:
Expand Down Expand Up @@ -131,16 +137,8 @@ def __repr__(self):

@property
def id(self):
if isinstance(self._group_object, h5py.Group):
return LindiH5pyGroupId(self._group_object.id)
elif isinstance(self._group_object, zarr.Group):
# This is commented out for now because pynwb gets the id of a group
# in at least one place. But that could be avoided in the future, at
# which time, we could uncomment this.
# print('WARNING: Accessing low-level id of LindiH5pyGroup. This should be avoided.')
return LindiH5pyGroupId('')
else:
raise Exception(f'Unexpected group object type: {type(self._group_object)}')
# see comment above
return self._id

@property
def file(self):
Expand Down

0 comments on commit 015552b

Please sign in to comment.