From eb9cb1f9a71856fa37a00065aeb06007d6b77081 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Fri, 15 Sep 2023 21:01:55 -0700 Subject: [PATCH] checkpoint --- docs/gallery/read.py | 27 +++- src/hdmf_zarr/backend.py | 14 +- tests/unit/test_io_convert.py | 279 ++++++++++++++++++++++------------ tests/unit/utils.py | 3 +- 4 files changed, 220 insertions(+), 103 deletions(-) diff --git a/docs/gallery/read.py b/docs/gallery/read.py index fa982a89..d4f98c31 100644 --- a/docs/gallery/read.py +++ b/docs/gallery/read.py @@ -30,15 +30,32 @@ from pynwb import NWBHDF5IO from hdmf_zarr.nwb import NWBZarrIO -with NWBHDF5IO(filename, 'r', load_namespaces=False) as read_io: - file = read_io.read() +# with NWBHDF5IO(filename, 'r', load_namespaces=False) as read_io: +# file = read_io.read() # with NWBHDF5IO(filename, 'r', load_namespaces=False) as read_io: # Create HDF5 IO object for read with NWBZarrIO(zarr_filename, mode='w') as export_io: # Create Zarr IO object for write export_io.export(src_io=read_io, write_args=dict(link_data=False)) # Export from HDF5 to Zarr +# +# with NWBZarrIO(path=zarr_filename, mode="r") as io: +# infile = io.read() + +# group = infile.electrodes.group.data[0] +# breakpoint() +######### +with NWBZarrIO(zarr_filename, mode='r') as read_io: # Create Zarr IO object for read + with NWBHDF5IO(hdf_filename, 'w') as export_io: # Create HDF5 IO object for write + export_io.export(src_io=read_io, write_args=dict(link_data=False)) # Export from Zarr to HDF5 -with NWBZarrIO(path=zarr_filename, mode="r") as io: - infile = io.read() +############################################################################### +# Read the new HDF5 file back +# --------------------------- +# +# Now our file has been converted from HDF5 to Zarr and back again to HDF5. +# Here we check that we can still read that file. -group = infile.electrodes.group.data[0] +with NWBHDF5IO(hdf_filename, 'r') as hr: + hf = hr.read() + breakpoint() + hf.electrodes.group.data breakpoint() diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index c54dd51f..0f4915a2 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -300,6 +300,7 @@ def get_builder_disk_path(self, **kwargs): def write_builder(self, **kwargs): """Write a builder to disk""" f_builder, link_data, exhaust_dci = getargs('builder', 'link_data', 'exhaust_dci', kwargs) + breakpoint() for name, gbldr in f_builder.groups.items(): self.write_group(parent=self.__file, builder=gbldr, @@ -530,10 +531,14 @@ def resolve_ref(self, zarr_ref): else: target_name = ROOT_NAME target_zarr_obj = zarr.open(source_file, mode='r') + # if target_name=='baz0': + # breakpoint() if object_path is not None: try: target_zarr_obj = target_zarr_obj[object_path] except Exception: + # breakpoint() + raise ValueError("Found bad link to object %s in file %s" % (object_path, source_file)) # Return the create path return target_name, target_zarr_obj @@ -699,6 +704,7 @@ def __setup_chunked_dataset__(cls, parent, name, data, options=None): returns='the Zarr array that was created', rtype=Array) def write_dataset(self, **kwargs): # noqa: C901 parent, builder, link_data, exhaust_dci = getargs('parent', 'builder', 'link_data', 'exhaust_dci', kwargs) + # breakpoint() force_data = getargs('force_data', kwargs) if self.get_written(builder): return None @@ -1086,8 +1092,8 @@ def __read_group(self, zarr_obj, name=None): ret.set_group(sub_builder) # breakpoint() # read sub datasets + # breakpoint() for sub_name, sub_array in zarr_obj.arrays(): - # breakpoint() sub_builder = self.__read_dataset(sub_array, sub_name) ret.set_dataset(sub_builder) @@ -1125,6 +1131,7 @@ def __read_links(self, zarr_obj, parent): def __read_dataset(self, zarr_obj, name): # breakpoint() ret = self.__get_built(zarr_obj) + # breakpoint() if ret is not None: return ret @@ -1156,7 +1163,6 @@ def __read_dataset(self, zarr_obj, name): reg_refs = False has_reference = False if isinstance(dtype, list): - breakpoint() # compound data type obj_refs = list() reg_refs = list() @@ -1176,10 +1182,12 @@ def __read_dataset(self, zarr_obj, name): data = BuilderZarrTableDataset(zarr_obj, self, retrieved_dtypes) # d = BuilderH5TableDataset(zarr_obj, self, dtype) elif self.__is_ref(dtype): + # breakpoint() # reference array - has_reference = True + has_reference = True #TODO: REMOVE if dtype == 'object': # wrap with dataset ref # obj_refs = True + breakpoint() data = BuilderZarrReferenceDataset(data, self) elif dtype == 'region': reg_refs = True diff --git a/tests/unit/test_io_convert.py b/tests/unit/test_io_convert.py index b60741db..33ae724c 100644 --- a/tests/unit/test_io_convert.py +++ b/tests/unit/test_io_convert.py @@ -105,8 +105,13 @@ class MixinTestCaseConvert(metaclass=ABCMeta): (Default=[None, ]) """ + REFERENCES = False + """ + Bool parameter passed to check for references. + """ + def get_manager(self): - raise NotImplementedError('Cannot run test unless get_manger is implemented') + raise NotImplementedError('Cannot run test unless get_manger is implemented') def setUp(self): self.__manager = self.get_manager() @@ -160,11 +165,21 @@ def test_export_roundtrip(self): self.filenames.append(write_path if isinstance(write_path, str) else write_path.path) self.filenames.append(export_path if isinstance(export_path, str) else export_path.path) # roundtrip the container + # breakpoint() exported_container = self.roundtripExportContainer( container=container, write_path=write_path, export_path=export_path) - breakpoint() + # breakpoint() + if self.REFERENCES: + num_bazs = 10 + for i in range(num_bazs): + baz_name = 'baz%d' % i + # self.assertEqual(exported_container.baz_cpd_data.data[i][0], i) + # self.assertIs(exported_container.baz_cpd_data.data[i][1], exported_container.bazs[baz_name]) + self.assertEqual(exported_container.baz_data.data.__class__.__name__, 'ContainerH5ReferenceDataset') + self.assertIs(exported_container.baz_data.data[i], exported_container.bazs[baz_name]) + # breakpoint() # assert that the roundtrip worked correctly message = "Using: write_path=%s, export_path=%s" % (str(write_path), str(export_path)) self.assertIsNotNone(str(container), message) # added as a test to make sure printing works @@ -205,15 +220,16 @@ def get_manager(self): def roundtripExportContainer(self, container, write_path, export_path): with HDF5IO(write_path, manager=self.get_manager(), mode='w') as write_io: write_io.write(container, cache_spec=True) - + # breakpoint() with HDF5IO(write_path, manager=self.get_manager(), mode='r') as read_io: with ZarrIO(export_path, mode='w') as export_io: export_io.export(src_io=read_io, write_args={'link_data': False}) read_io = ZarrIO(export_path, manager=self.get_manager(), mode='r') - self.ios.append(read_io) - exportContainer = read_io.read() - return exportContainer + breakpoint() + # self.ios.append(read_io) + # exportContainer = read_io.read() + # return exportContainer class MixinTestZarrToHDF5(): @@ -229,18 +245,19 @@ class MixinTestZarrToHDF5(): NestedDirectoryStore('test_export_NestedDirectoryStore.zarr')] EXPORT_PATHS = [None, ] - def get_manager(self): - return get_hdmfcommon_manager() + # def get_manager(self): + # return get_hdmfcommon_manager() def roundtripExportContainer(self, container, write_path, export_path): with ZarrIO(write_path, manager=self.get_manager(), mode='w') as write_io: - write_io.write(container, cache_spec=True) - + write_io.write(container) + # breakpoint() with ZarrIO(write_path, manager=self.get_manager(), mode='r') as read_io: with HDF5IO(export_path, mode='w') as export_io: export_io.export(src_io=read_io, write_args={'link_data': False}) read_io = HDF5IO(export_path, manager=self.get_manager(), mode='r') + # breakpoint() self.ios.append(read_io) exportContainer = read_io.read() return exportContainer @@ -268,15 +285,16 @@ def get_manager(self): def roundtripExportContainer(self, container, write_path, export_path): with ZarrIO(write_path, manager=self.get_manager(), mode='w') as write_io: write_io.write(container, cache_spec=True) - + # breakpoint() with ZarrIO(write_path, manager=self.get_manager(), mode='r') as read_io: with ZarrIO(export_path, mode='w') as export_io: export_io.export(src_io=read_io, write_args={'link_data': False}) read_io = ZarrIO(export_path, manager=self.get_manager(), mode='r') - self.ios.append(read_io) - exportContainer = read_io.read() - return exportContainer + breakpoint() + # self.ios.append(read_io) + # exportContainer = read_io.read() + # return exportContainer ############################################ @@ -380,6 +398,123 @@ def setUpContainer(self): else: raise NotImplementedError("FOO_TYPE %i not implemented in test" % self.FOO_TYPE) +######################################## +# HDMF Baz test container of references +######################################## +class MixinTestBaz1(): + """ + + """ + def get_manager(self): + return get_baz_buildmanager() + + def setUpContainer(self): + num_bazs = 10 + + # set up dataset of references + bazs = [] + for i in range(num_bazs): + bazs.append(Baz(name='baz%d' % i)) + baz_data = BazData(name='baz_data1', data=bazs) + + + # # set up compound dataset of references + # baz_pairs = [] + # for i in range(num_bazs): + # b = Baz(name='baz%d' % i) + # bazs.append(b) + # baz_pairs.append((i, b)) + # baz_cpd_data = BazCpdData(name='baz_cpd_data1', data=baz_pairs) + + bucket = BazBucket(bazs=bazs, baz_data=baz_data) + + return bucket + +######################################## +# HDMF Baz test container of references +######################################## + + + # def test_export_dset_refs(self): + # """Test that exporting a written container with a dataset of references works.""" + # self.path = [get_temp_filepath() for i in range(2)] + # breakpoint() + # bazs = [] + # num_bazs = 10 + # for i in range(num_bazs): + # bazs.append(Baz(name='baz%d' % i)) + # baz_data = BazData(name='baz_data1', data=bazs) + # bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data) + # # breakpoint() + # with HDF5IO(self.path[0], manager=get_baz_buildmanager(), mode='w') as write_io: + # write_io.write(bucket) + # + # with HDF5IO(self.path[0], manager=get_baz_buildmanager(), mode='r') as read_io: + # read_bucket1 = read_io.read() + # # NOTE: reference IDs might be the same between two identical files + # # adding a Baz with a smaller name should change the reference IDs on export + # new_baz = Baz(name='baz000') + # read_bucket1.add_baz(new_baz) + # + # with ZarrIO(self.path[1], mode='w') as export_io: + # export_io.export(src_io=read_io, container=read_bucket1, write_args=dict(link_data=False)) + # + # with ZarrIO(self.path[1], manager=get_baz_buildmanager(), mode='r') as read_io: + # read_bucket2 = read_io.read() + # + # # remove and check the appended child, then compare the read container with the original + # read_new_baz = read_bucket2.remove_baz('baz000') + # + # self.assertContainerEqual(new_baz, read_new_baz, ignore_hdmf_attrs=True) + # + # self.assertContainerEqual(bucket, read_bucket2, ignore_name=True, ignore_hdmf_attrs=True) + # # assert the builders were resolved + # for i in range(num_bazs): + # baz_name = 'baz%d' % i + # self.assertEqual(read_bucket2.baz_data.data.__class__.__name__, 'ContainerZarrReferenceDataset') + # self.assertIs(read_bucket2.baz_data.data[i], read_bucket2.bazs[baz_name]) + + # def test_export_cpd_dset_refs(self): + # self.path = [get_temp_filepath() for i in range(2)] + # """Test that exporting a written container with a compound dataset with references works.""" + # bazs = [] + # baz_pairs = [] + # num_bazs = 10 + # for i in range(num_bazs): + # b = Baz(name='baz%d' % i) + # bazs.append(b) + # baz_pairs.append((i, b)) + # baz_cpd_data = BazCpdData(name='baz_cpd_data1', data=baz_pairs) + # bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_cpd_data=baz_cpd_data) + # + # with HDF5IO(self.path[0], manager=get_baz_buildmanager(), mode='w') as write_io: + # write_io.write(bucket) + # + # with HDF5IO(self.path[0], manager=get_baz_buildmanager(), mode='r') as read_io: + # read_bucket1 = read_io.read() + # + # # NOTE: reference IDs might be the same between two identical files + # # adding a Baz with a smaller name should change the reference IDs on export + # new_baz = Baz(name='baz000') + # read_bucket1.add_baz(new_baz) + # + # with ZarrIO(self.path[1], mode='w') as export_io: + # export_io.export(src_io=read_io, container=read_bucket1, write_args=dict(link_data=False)) + # + # with ZarrIO(self.path[1], manager=get_baz_buildmanager(), mode='r') as read_io: + # read_bucket2 = read_io.read() + # + # # remove and check the appended child, then compare the read container with the original + # read_new_baz = read_bucket2.remove_baz(new_baz.name) + # self.assertContainerEqual(new_baz, read_new_baz, ignore_hdmf_attrs=True) + # + # self.assertContainerEqual(bucket, read_bucket2, ignore_name=True, ignore_hdmf_attrs=True) + # for i in range(num_bazs): + # baz_name = 'baz%d' % i + # self.assertEqual(read_bucket2.baz_cpd_data.data[i][0], i) + # self.assertIs(read_bucket2.baz_cpd_data.data[i][1], read_bucket2.bazs[baz_name]) + # + ######################################## # Actual test cases for conversion @@ -590,6 +725,42 @@ class TestHDF5toZarrFooCase2(MixinTestFoo, IGNORE_STRING_TO_BYTE = True FOO_TYPE = MixinTestFoo.FOO_TYPES['link_data'] +class TestZarrToHDF5Baz(MixinTestBaz1, + MixinTestZarrToHDF5, + MixinTestCaseConvert, + TestCase): + """ + Test the conversion of a simple Foo container with two buckets of datasets from Zarr to HDF5 + See MixinTestFoo.setUpContainer for the container spec used. + """ + IGNORE_NAME = True + IGNORE_HDMF_ATTRS = True + IGNORE_STRING_TO_BYTE = True + REFERENCES = True + +class TestHDF5toZarrBaz(MixinTestBaz1, + MixinTestHDF5ToZarr, + MixinTestCaseConvert, + TestCase): + """ + + """ + IGNORE_NAME = True + IGNORE_HDMF_ATTRS = True + IGNORE_STRING_TO_BYTE = True + REFERENCES = True + +class TestZarrtoZarrBaz(MixinTestBaz1, + MixinTestZarrToZarr, + MixinTestCaseConvert, + TestCase): + """ + + """ + IGNORE_NAME = True + IGNORE_HDMF_ATTRS = True + IGNORE_STRING_TO_BYTE = True + REFERENCES = True # TODO: Fails because we need to copy the data from the ExternalLink as it points to a non-Zarr source """ @@ -668,83 +839,3 @@ def roundtripExportContainer(self): exportContainer = read_io.read() return exportContainer """ -from hdmf_zarr.backend import ZarrIO - -class Test_Export_Dataset_of_References(TestCase): - def test_export_dset_refs(self): - """Test that exporting a written container with a dataset of references works.""" - self.path = [get_temp_filepath() for i in range(2)] - bazs = [] - num_bazs = 10 - for i in range(num_bazs): - bazs.append(Baz(name='baz%d' % i)) - baz_data = BazData(name='baz_data1', data=bazs) - bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data) - # breakpoint() - with HDF5IO(self.path[0], manager=get_baz_buildmanager(), mode='w') as write_io: - write_io.write(bucket) - - with HDF5IO(self.path[0], manager=get_baz_buildmanager(), mode='r') as read_io: - read_bucket1 = read_io.read() - # NOTE: reference IDs might be the same between two identical files - # adding a Baz with a smaller name should change the reference IDs on export - new_baz = Baz(name='baz000') - read_bucket1.add_baz(new_baz) - - with ZarrIO(self.path[1], mode='w') as export_io: - export_io.export(src_io=read_io, container=read_bucket1, write_args=dict(link_data=False)) - - with ZarrIO(self.path[1], manager=get_baz_buildmanager(), mode='r') as read_io: - read_bucket2 = read_io.read() - - # remove and check the appended child, then compare the read container with the original - read_new_baz = read_bucket2.remove_baz('baz000') - - self.assertContainerEqual(new_baz, read_new_baz, ignore_hdmf_attrs=True) - - self.assertContainerEqual(bucket, read_bucket2, ignore_name=True, ignore_hdmf_attrs=True) - # assert the builders were resolved - for i in range(num_bazs): - baz_name = 'baz%d' % i - self.assertEqual(read_bucket2.baz_data.data.__class__.__name__, 'ContainerZarrReferenceDataset') - self.assertIs(read_bucket2.baz_data.data[i], read_bucket2.bazs[baz_name]) - - def test_export_cpd_dset_refs(self): - self.path = [get_temp_filepath() for i in range(2)] - """Test that exporting a written container with a compound dataset with references works.""" - bazs = [] - baz_pairs = [] - num_bazs = 10 - for i in range(num_bazs): - b = Baz(name='baz%d' % i) - bazs.append(b) - baz_pairs.append((i, b)) - baz_cpd_data = BazCpdData(name='baz_cpd_data1', data=baz_pairs) - bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_cpd_data=baz_cpd_data) - - with HDF5IO(self.path[0], manager=get_baz_buildmanager(), mode='w') as write_io: - write_io.write(bucket) - - with HDF5IO(self.path[0], manager=get_baz_buildmanager(), mode='r') as read_io: - read_bucket1 = read_io.read() - - # NOTE: reference IDs might be the same between two identical files - # adding a Baz with a smaller name should change the reference IDs on export - new_baz = Baz(name='baz000') - read_bucket1.add_baz(new_baz) - - with ZarrIO(self.path[1], mode='w') as export_io: - export_io.export(src_io=read_io, container=read_bucket1, write_args=dict(link_data=False)) - - with ZarrIO(self.path[1], manager=get_baz_buildmanager(), mode='r') as read_io: - read_bucket2 = read_io.read() - - # remove and check the appended child, then compare the read container with the original - read_new_baz = read_bucket2.remove_baz(new_baz.name) - self.assertContainerEqual(new_baz, read_new_baz, ignore_hdmf_attrs=True) - - self.assertContainerEqual(bucket, read_bucket2, ignore_name=True, ignore_hdmf_attrs=True) - for i in range(num_bazs): - baz_name = 'baz%d' % i - self.assertEqual(read_bucket2.baz_cpd_data.data[i][0], i) - self.assertIs(read_bucket2.baz_cpd_data.data[i][1], read_bucket2.bazs[baz_name]) diff --git a/tests/unit/utils.py b/tests/unit/utils.py index 64ccc4af..123536e0 100644 --- a/tests/unit/utils.py +++ b/tests/unit/utils.py @@ -306,8 +306,9 @@ class BazCpdData(Data): class BazBucket(Container): + ROOT_NAME = 'root' - @docval({'name': 'name', 'type': str, 'doc': 'the name of this bucket'}, + @docval({'name': 'name', 'type': str, 'doc': 'the name of this bucket', 'default': 'root'}, {'name': 'bazs', 'type': list, 'doc': 'the Baz objects in this bucket'}, {'name': 'baz_data', 'type': BazData, 'doc': 'dataset of Baz references', 'default': None}, {'name': 'baz_cpd_data', 'type': BazCpdData, 'doc': 'dataset of Baz references', 'default': None})