From eb9cb1f9a71856fa37a00065aeb06007d6b77081 Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Fri, 15 Sep 2023 21:01:55 -0700
Subject: [PATCH] checkpoint

---
 docs/gallery/read.py          |  27 +++-
 src/hdmf_zarr/backend.py      |  14 +-
 tests/unit/test_io_convert.py | 279 ++++++++++++++++++++++------------
 tests/unit/utils.py           |   3 +-
 4 files changed, 220 insertions(+), 103 deletions(-)

diff --git a/docs/gallery/read.py b/docs/gallery/read.py
index fa982a89..d4f98c31 100644
--- a/docs/gallery/read.py
+++ b/docs/gallery/read.py
@@ -30,15 +30,32 @@
 from pynwb import NWBHDF5IO
 from hdmf_zarr.nwb import NWBZarrIO
 
-with NWBHDF5IO(filename, 'r', load_namespaces=False) as read_io:
-    file = read_io.read()
+# with NWBHDF5IO(filename, 'r', load_namespaces=False) as read_io:
+#     file = read_io.read()
 #
 with NWBHDF5IO(filename, 'r', load_namespaces=False) as read_io:  # Create HDF5 IO object for read
     with NWBZarrIO(zarr_filename, mode='w') as export_io:         # Create Zarr IO object for write
         export_io.export(src_io=read_io, write_args=dict(link_data=False))   # Export from HDF5 to Zarr
+#
+# with NWBZarrIO(path=zarr_filename, mode="r") as io:
+#     infile = io.read()
+
+# group = infile.electrodes.group.data[0]
+# breakpoint()
+#########
+with NWBZarrIO(zarr_filename, mode='r') as read_io:  # Create Zarr IO object for read
+    with NWBHDF5IO(hdf_filename, 'w') as export_io:  # Create HDF5 IO object for write
+        export_io.export(src_io=read_io, write_args=dict(link_data=False))  # Export from Zarr to HDF5
 
-with NWBZarrIO(path=zarr_filename, mode="r") as io:
-    infile = io.read()
+###############################################################################
+# Read the new HDF5 file back
+# ---------------------------
+#
+# Now our file has been converted from HDF5 to Zarr and back again to HDF5.
+# Here we check that we can still read that file.
 
-group = infile.electrodes.group.data[0]
+with NWBHDF5IO(hdf_filename, 'r') as hr:
+    hf = hr.read()
+    breakpoint()
+    hf.electrodes.group.data
 breakpoint()
diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py
index c54dd51f..0f4915a2 100644
--- a/src/hdmf_zarr/backend.py
+++ b/src/hdmf_zarr/backend.py
@@ -300,6 +300,7 @@ def get_builder_disk_path(self, **kwargs):
     def write_builder(self, **kwargs):
         """Write a builder to disk"""
         f_builder, link_data, exhaust_dci = getargs('builder', 'link_data', 'exhaust_dci', kwargs)
+        breakpoint()
         for name, gbldr in f_builder.groups.items():
             self.write_group(parent=self.__file,
                              builder=gbldr,
@@ -530,10 +531,14 @@ def resolve_ref(self, zarr_ref):
         else:
             target_name = ROOT_NAME
         target_zarr_obj = zarr.open(source_file, mode='r')
+        # if target_name=='baz0':
+        #     breakpoint()
         if object_path is not None:
             try:
                 target_zarr_obj = target_zarr_obj[object_path]
             except Exception:
+                # breakpoint()
+
                 raise ValueError("Found bad link to object %s in file %s" % (object_path, source_file))
         # Return the create path
         return target_name, target_zarr_obj
@@ -699,6 +704,7 @@ def __setup_chunked_dataset__(cls, parent, name, data, options=None):
             returns='the Zarr array that was created', rtype=Array)
     def write_dataset(self, **kwargs):  # noqa: C901
         parent, builder, link_data, exhaust_dci = getargs('parent', 'builder', 'link_data', 'exhaust_dci', kwargs)
+        # breakpoint()
         force_data = getargs('force_data', kwargs)
         if self.get_written(builder):
             return None
@@ -1086,8 +1092,8 @@ def __read_group(self, zarr_obj, name=None):
             ret.set_group(sub_builder)
         # breakpoint()
         # read sub datasets
+        # breakpoint()
         for sub_name, sub_array in zarr_obj.arrays():
-            # breakpoint()
             sub_builder = self.__read_dataset(sub_array, sub_name)
             ret.set_dataset(sub_builder)
 
@@ -1125,6 +1131,7 @@ def __read_links(self, zarr_obj, parent):
     def __read_dataset(self, zarr_obj, name):
         # breakpoint()
         ret = self.__get_built(zarr_obj)
+        # breakpoint()
         if ret is not None:
             return ret
 
@@ -1156,7 +1163,6 @@ def __read_dataset(self, zarr_obj, name):
         reg_refs = False
         has_reference = False
         if isinstance(dtype, list):
-            breakpoint()
             # compound data type
             obj_refs = list()
             reg_refs = list()
@@ -1176,10 +1182,12 @@ def __read_dataset(self, zarr_obj, name):
             data = BuilderZarrTableDataset(zarr_obj, self, retrieved_dtypes)
             # d = BuilderH5TableDataset(zarr_obj, self, dtype)
         elif self.__is_ref(dtype):
+            # breakpoint()
             # reference array
-            has_reference = True
+            has_reference = True #TODO: REMOVE
             if dtype == 'object': # wrap with dataset ref
                 # obj_refs = True
+                breakpoint()
                 data = BuilderZarrReferenceDataset(data, self)
             elif dtype == 'region':
                 reg_refs = True
diff --git a/tests/unit/test_io_convert.py b/tests/unit/test_io_convert.py
index b60741db..33ae724c 100644
--- a/tests/unit/test_io_convert.py
+++ b/tests/unit/test_io_convert.py
@@ -105,8 +105,13 @@ class MixinTestCaseConvert(metaclass=ABCMeta):
     (Default=[None, ])
     """
 
+    REFERENCES = False
+    """
+    Bool parameter passed to check for references.
+    """
+
     def get_manager(self):
-        raise NotImplementedError('Cannot run test unless get_manger  is implemented')
+        raise NotImplementedError('Cannot run test unless get_manger is implemented')
 
     def setUp(self):
         self.__manager = self.get_manager()
@@ -160,11 +165,21 @@ def test_export_roundtrip(self):
                 self.filenames.append(write_path if isinstance(write_path, str) else write_path.path)
                 self.filenames.append(export_path if isinstance(export_path, str) else export_path.path)
                 # roundtrip the container
+                # breakpoint()
                 exported_container = self.roundtripExportContainer(
                     container=container,
                     write_path=write_path,
                     export_path=export_path)
-                breakpoint()
+                # breakpoint()
+                if self.REFERENCES:
+                    num_bazs = 10
+                    for i in range(num_bazs):
+                        baz_name = 'baz%d' % i
+                        # self.assertEqual(exported_container.baz_cpd_data.data[i][0], i)
+                        # self.assertIs(exported_container.baz_cpd_data.data[i][1], exported_container.bazs[baz_name])
+                        self.assertEqual(exported_container.baz_data.data.__class__.__name__, 'ContainerH5ReferenceDataset')
+                        self.assertIs(exported_container.baz_data.data[i], exported_container.bazs[baz_name])
+                    # breakpoint()
                 # assert that the roundtrip worked correctly
                 message = "Using: write_path=%s, export_path=%s" % (str(write_path), str(export_path))
                 self.assertIsNotNone(str(container), message)  # added as a test to make sure printing works
@@ -205,15 +220,16 @@ def get_manager(self):
     def roundtripExportContainer(self, container, write_path, export_path):
         with HDF5IO(write_path, manager=self.get_manager(), mode='w') as write_io:
             write_io.write(container, cache_spec=True)
-
+        # breakpoint()
         with HDF5IO(write_path, manager=self.get_manager(), mode='r') as read_io:
             with ZarrIO(export_path, mode='w') as export_io:
                 export_io.export(src_io=read_io, write_args={'link_data': False})
 
         read_io = ZarrIO(export_path, manager=self.get_manager(), mode='r')
-        self.ios.append(read_io)
-        exportContainer = read_io.read()
-        return exportContainer
+        breakpoint()
+        # self.ios.append(read_io)
+        # exportContainer = read_io.read()
+        # return exportContainer
 
 
 class MixinTestZarrToHDF5():
@@ -229,18 +245,19 @@ class MixinTestZarrToHDF5():
                    NestedDirectoryStore('test_export_NestedDirectoryStore.zarr')]
     EXPORT_PATHS = [None, ]
 
-    def get_manager(self):
-        return get_hdmfcommon_manager()
+    # def get_manager(self):
+    #     return get_hdmfcommon_manager()
 
     def roundtripExportContainer(self, container,  write_path, export_path):
         with ZarrIO(write_path, manager=self.get_manager(), mode='w') as write_io:
-            write_io.write(container, cache_spec=True)
-
+            write_io.write(container)
+        # breakpoint()
         with ZarrIO(write_path, manager=self.get_manager(), mode='r') as read_io:
             with HDF5IO(export_path,  mode='w') as export_io:
                 export_io.export(src_io=read_io, write_args={'link_data': False})
 
         read_io = HDF5IO(export_path, manager=self.get_manager(), mode='r')
+        # breakpoint()
         self.ios.append(read_io)
         exportContainer = read_io.read()
         return exportContainer
@@ -268,15 +285,16 @@ def get_manager(self):
     def roundtripExportContainer(self, container,  write_path, export_path):
         with ZarrIO(write_path, manager=self.get_manager(), mode='w') as write_io:
             write_io.write(container, cache_spec=True)
-
+        # breakpoint()
         with ZarrIO(write_path, manager=self.get_manager(), mode='r') as read_io:
             with ZarrIO(export_path,  mode='w') as export_io:
                 export_io.export(src_io=read_io, write_args={'link_data': False})
 
         read_io = ZarrIO(export_path, manager=self.get_manager(), mode='r')
-        self.ios.append(read_io)
-        exportContainer = read_io.read()
-        return exportContainer
+        breakpoint()
+        # self.ios.append(read_io)
+        # exportContainer = read_io.read()
+        # return exportContainer
 
 
 ############################################
@@ -380,6 +398,123 @@ def setUpContainer(self):
         else:
             raise NotImplementedError("FOO_TYPE %i not implemented in test" % self.FOO_TYPE)
 
+########################################
+# HDMF Baz test container of references
+########################################
+class MixinTestBaz1():
+    """
+
+    """
+    def get_manager(self):
+        return get_baz_buildmanager()
+
+    def setUpContainer(self):
+        num_bazs = 10
+
+        # set up dataset of references
+        bazs = []
+        for i in range(num_bazs):
+            bazs.append(Baz(name='baz%d' % i))
+        baz_data = BazData(name='baz_data1', data=bazs)
+
+
+        # # set up compound dataset of references
+        # baz_pairs = []
+        # for i in range(num_bazs):
+        #     b = Baz(name='baz%d' % i)
+        #     bazs.append(b)
+        #     baz_pairs.append((i, b))
+        # baz_cpd_data = BazCpdData(name='baz_cpd_data1', data=baz_pairs)
+
+        bucket = BazBucket(bazs=bazs, baz_data=baz_data)
+
+        return bucket
+
+########################################
+# HDMF Baz test container of references
+########################################
+
+
+    # def test_export_dset_refs(self):
+    #     """Test that exporting a written container with a dataset of references works."""
+    #     self.path = [get_temp_filepath() for i in range(2)]
+    #     breakpoint()
+    #     bazs = []
+    #     num_bazs = 10
+    #     for i in range(num_bazs):
+    #         bazs.append(Baz(name='baz%d' % i))
+    #     baz_data = BazData(name='baz_data1', data=bazs)
+    #     bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data)
+    #     # breakpoint()
+    #     with HDF5IO(self.path[0], manager=get_baz_buildmanager(), mode='w') as write_io:
+    #         write_io.write(bucket)
+    #
+    #     with HDF5IO(self.path[0], manager=get_baz_buildmanager(), mode='r') as read_io:
+    #         read_bucket1 = read_io.read()
+    #         # NOTE: reference IDs might be the same between two identical files
+    #         # adding a Baz with a smaller name should change the reference IDs on export
+    #         new_baz = Baz(name='baz000')
+    #         read_bucket1.add_baz(new_baz)
+    #
+    #         with ZarrIO(self.path[1], mode='w') as export_io:
+    #             export_io.export(src_io=read_io, container=read_bucket1, write_args=dict(link_data=False))
+    #
+    #     with ZarrIO(self.path[1], manager=get_baz_buildmanager(), mode='r') as read_io:
+    #         read_bucket2 = read_io.read()
+    #
+    #         # remove and check the appended child, then compare the read container with the original
+    #         read_new_baz = read_bucket2.remove_baz('baz000')
+    #
+    #         self.assertContainerEqual(new_baz, read_new_baz, ignore_hdmf_attrs=True)
+    #
+    #         self.assertContainerEqual(bucket, read_bucket2, ignore_name=True, ignore_hdmf_attrs=True)
+    #         # assert the builders were resolved
+    #         for i in range(num_bazs):
+    #             baz_name = 'baz%d' % i
+    #             self.assertEqual(read_bucket2.baz_data.data.__class__.__name__, 'ContainerZarrReferenceDataset')
+    #             self.assertIs(read_bucket2.baz_data.data[i], read_bucket2.bazs[baz_name])
+
+    # def test_export_cpd_dset_refs(self):
+    #     self.path = [get_temp_filepath() for i in range(2)]
+    #     """Test that exporting a written container with a compound dataset with references works."""
+    #     bazs = []
+    #     baz_pairs = []
+    #     num_bazs = 10
+    #     for i in range(num_bazs):
+    #         b = Baz(name='baz%d' % i)
+    #         bazs.append(b)
+    #         baz_pairs.append((i, b))
+    #     baz_cpd_data = BazCpdData(name='baz_cpd_data1', data=baz_pairs)
+    #     bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_cpd_data=baz_cpd_data)
+    #
+    #     with HDF5IO(self.path[0], manager=get_baz_buildmanager(), mode='w') as write_io:
+    #         write_io.write(bucket)
+    #
+    #     with HDF5IO(self.path[0], manager=get_baz_buildmanager(), mode='r') as read_io:
+    #         read_bucket1 = read_io.read()
+    #
+    #         # NOTE: reference IDs might be the same between two identical files
+    #         # adding a Baz with a smaller name should change the reference IDs on export
+    #         new_baz = Baz(name='baz000')
+    #         read_bucket1.add_baz(new_baz)
+    #
+    #         with ZarrIO(self.path[1], mode='w') as export_io:
+    #             export_io.export(src_io=read_io, container=read_bucket1, write_args=dict(link_data=False))
+    #
+    #     with ZarrIO(self.path[1], manager=get_baz_buildmanager(), mode='r') as read_io:
+    #         read_bucket2 = read_io.read()
+    #
+    #         # remove and check the appended child, then compare the read container with the original
+    #         read_new_baz = read_bucket2.remove_baz(new_baz.name)
+    #         self.assertContainerEqual(new_baz, read_new_baz, ignore_hdmf_attrs=True)
+    #
+    #         self.assertContainerEqual(bucket, read_bucket2, ignore_name=True, ignore_hdmf_attrs=True)
+    #         for i in range(num_bazs):
+    #             baz_name = 'baz%d' % i
+    #             self.assertEqual(read_bucket2.baz_cpd_data.data[i][0], i)
+    #             self.assertIs(read_bucket2.baz_cpd_data.data[i][1], read_bucket2.bazs[baz_name])
+    #
+
 
 ########################################
 # Actual test cases for conversion
@@ -590,6 +725,42 @@ class TestHDF5toZarrFooCase2(MixinTestFoo,
     IGNORE_STRING_TO_BYTE = True
     FOO_TYPE = MixinTestFoo.FOO_TYPES['link_data']
 
+class TestZarrToHDF5Baz(MixinTestBaz1,
+                        MixinTestZarrToHDF5,
+                        MixinTestCaseConvert,
+                        TestCase):
+    """
+    Test the conversion of a simple Foo container with two buckets of datasets from Zarr to HDF5
+    See MixinTestFoo.setUpContainer for the container spec used.
+    """
+    IGNORE_NAME = True
+    IGNORE_HDMF_ATTRS = True
+    IGNORE_STRING_TO_BYTE = True
+    REFERENCES = True
+
+class TestHDF5toZarrBaz(MixinTestBaz1,
+                        MixinTestHDF5ToZarr,
+                        MixinTestCaseConvert,
+                        TestCase):
+    """
+
+    """
+    IGNORE_NAME = True
+    IGNORE_HDMF_ATTRS = True
+    IGNORE_STRING_TO_BYTE = True
+    REFERENCES = True
+
+class TestZarrtoZarrBaz(MixinTestBaz1,
+                        MixinTestZarrToZarr,
+                        MixinTestCaseConvert,
+                        TestCase):
+    """
+
+    """
+    IGNORE_NAME = True
+    IGNORE_HDMF_ATTRS = True
+    IGNORE_STRING_TO_BYTE = True
+    REFERENCES = True
 
 # TODO: Fails because we need to copy the data from the ExternalLink as it points to a non-Zarr source
 """
@@ -668,83 +839,3 @@ def roundtripExportContainer(self):
         exportContainer = read_io.read()
         return exportContainer
 """
-from hdmf_zarr.backend import ZarrIO
-
-class Test_Export_Dataset_of_References(TestCase):
-    def test_export_dset_refs(self):
-        """Test that exporting a written container with a dataset of references works."""
-        self.path = [get_temp_filepath() for i in range(2)]
-        bazs = []
-        num_bazs = 10
-        for i in range(num_bazs):
-            bazs.append(Baz(name='baz%d' % i))
-        baz_data = BazData(name='baz_data1', data=bazs)
-        bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data)
-        # breakpoint()
-        with HDF5IO(self.path[0], manager=get_baz_buildmanager(), mode='w') as write_io:
-            write_io.write(bucket)
-
-        with HDF5IO(self.path[0], manager=get_baz_buildmanager(), mode='r') as read_io:
-            read_bucket1 = read_io.read()
-            # NOTE: reference IDs might be the same between two identical files
-            # adding a Baz with a smaller name should change the reference IDs on export
-            new_baz = Baz(name='baz000')
-            read_bucket1.add_baz(new_baz)
-
-            with ZarrIO(self.path[1], mode='w') as export_io:
-                export_io.export(src_io=read_io, container=read_bucket1, write_args=dict(link_data=False))
-
-        with ZarrIO(self.path[1], manager=get_baz_buildmanager(), mode='r') as read_io:
-            read_bucket2 = read_io.read()
-
-            # remove and check the appended child, then compare the read container with the original
-            read_new_baz = read_bucket2.remove_baz('baz000')
-
-            self.assertContainerEqual(new_baz, read_new_baz, ignore_hdmf_attrs=True)
-
-            self.assertContainerEqual(bucket, read_bucket2, ignore_name=True, ignore_hdmf_attrs=True)
-            # assert the builders were resolved
-            for i in range(num_bazs):
-                baz_name = 'baz%d' % i
-                self.assertEqual(read_bucket2.baz_data.data.__class__.__name__, 'ContainerZarrReferenceDataset')
-                self.assertIs(read_bucket2.baz_data.data[i], read_bucket2.bazs[baz_name])
-
-    def test_export_cpd_dset_refs(self):
-        self.path = [get_temp_filepath() for i in range(2)]
-        """Test that exporting a written container with a compound dataset with references works."""
-        bazs = []
-        baz_pairs = []
-        num_bazs = 10
-        for i in range(num_bazs):
-            b = Baz(name='baz%d' % i)
-            bazs.append(b)
-            baz_pairs.append((i, b))
-        baz_cpd_data = BazCpdData(name='baz_cpd_data1', data=baz_pairs)
-        bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_cpd_data=baz_cpd_data)
-
-        with HDF5IO(self.path[0], manager=get_baz_buildmanager(), mode='w') as write_io:
-            write_io.write(bucket)
-
-        with HDF5IO(self.path[0], manager=get_baz_buildmanager(), mode='r') as read_io:
-            read_bucket1 = read_io.read()
-
-            # NOTE: reference IDs might be the same between two identical files
-            # adding a Baz with a smaller name should change the reference IDs on export
-            new_baz = Baz(name='baz000')
-            read_bucket1.add_baz(new_baz)
-
-            with ZarrIO(self.path[1], mode='w') as export_io:
-                export_io.export(src_io=read_io, container=read_bucket1, write_args=dict(link_data=False))
-
-        with ZarrIO(self.path[1], manager=get_baz_buildmanager(), mode='r') as read_io:
-            read_bucket2 = read_io.read()
-
-            # remove and check the appended child, then compare the read container with the original
-            read_new_baz = read_bucket2.remove_baz(new_baz.name)
-            self.assertContainerEqual(new_baz, read_new_baz, ignore_hdmf_attrs=True)
-
-            self.assertContainerEqual(bucket, read_bucket2, ignore_name=True, ignore_hdmf_attrs=True)
-            for i in range(num_bazs):
-                baz_name = 'baz%d' % i
-                self.assertEqual(read_bucket2.baz_cpd_data.data[i][0], i)
-                self.assertIs(read_bucket2.baz_cpd_data.data[i][1], read_bucket2.bazs[baz_name])
diff --git a/tests/unit/utils.py b/tests/unit/utils.py
index 64ccc4af..123536e0 100644
--- a/tests/unit/utils.py
+++ b/tests/unit/utils.py
@@ -306,8 +306,9 @@ class BazCpdData(Data):
 
 
 class BazBucket(Container):
+    ROOT_NAME = 'root'
 
-    @docval({'name': 'name', 'type': str, 'doc': 'the name of this bucket'},
+    @docval({'name': 'name', 'type': str, 'doc': 'the name of this bucket', 'default': 'root'},
             {'name': 'bazs', 'type': list, 'doc': 'the Baz objects in this bucket'},
             {'name': 'baz_data', 'type': BazData, 'doc': 'dataset of Baz references', 'default': None},
             {'name': 'baz_cpd_data', 'type': BazCpdData, 'doc': 'dataset of Baz references', 'default': None})