Skip to content

Commit

Permalink
i think I cracked it boys
Browse files Browse the repository at this point in the history
  • Loading branch information
mavaylon1 committed Sep 4, 2024
1 parent 6b2319a commit 78ae3f1
Show file tree
Hide file tree
Showing 5 changed files with 245 additions and 233 deletions.
26 changes: 18 additions & 8 deletions src/hdmf_zarr/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,7 @@ def write_group(self, **kwargs):

subgroups = builder.groups
if subgroups:
# breakpoint()
for subgroup_name, sub_builder in subgroups.items():
self.write_group(
parent=group,
Expand Down Expand Up @@ -603,6 +604,7 @@ def write_attributes(self, **kwargs):
if isinstance(value, Builder):
refs = self._create_ref(value, export_source)
else:
# breakpoint()
refs = self._create_ref(value.builder, export_source)
tmp = {'zarr_dtype': type_str, 'value': refs}
obj.attrs[key] = tmp
Expand Down Expand Up @@ -714,7 +716,6 @@ def resolve_ref(self, zarr_ref):
2) the target zarr object within the target file
"""
# Extract the path as defined in the zarr_ref object
# breakpoint()
if zarr_ref.get('source', None) is None:
source_file = str(zarr_ref['path'])
else:
Expand Down Expand Up @@ -743,6 +744,7 @@ def resolve_ref(self, zarr_ref):
except Exception:
raise ValueError("Found bad link to object %s in file %s" % (object_path, source_file))
# Return the create path
# breakpoint()
return target_name, target_zarr_obj

def _create_ref(self, ref_object, export_source=None):
Expand All @@ -764,7 +766,7 @@ def _create_ref(self, ref_object, export_source=None):
builder = ref_object.builder
else:
builder = self.manager.build(ref_object)

breakpoint()
path = self.__get_path(builder)
# TODO Add to get region for region references.
# Also add {'name': 'region', 'type': (slice, list, tuple),
Expand Down Expand Up @@ -825,6 +827,7 @@ def __add_link__(self, parent, target_source, target_path, link_name):
:param link_name: Name of the link
:type link_name: str
"""
breakpoint()
if 'zarr_link' not in parent.attrs:
parent.attrs['zarr_link'] = []
zarr_link = list(parent.attrs['zarr_link'])
Expand Down Expand Up @@ -986,21 +989,26 @@ def write_dataset(self, **kwargs): # noqa: C901
dset = None
else: # exporting
parent_filename = parent.store.path
parent_name = parent.name.split('/')[-1] # The parent is a zarr object whose name is a relative path.
###############
parent_name = ''.join(char for char in parent.name if char.isalpha()) # zarr parent name has '/' ###############
data_parent = '/'.join(data.name.split('/')[:-1])

# Case 1: The dataset is NOT in the export source, create a link to preserve the external link.
# I have three files, FileA, FileB, FileC. I want to export FileA to FileB. FileA has an
# EXTERNAL link to a dataset in Filec. This case preserves the link to FileC to also be in FileB.

if data_filename != export_source:
self.__add_link__(parent, data_filename, data.name, name)
linked = True
dset = None
# Case 2: The dataset is in the export source and has a DIFFERENT path as the builder, create a link.
# I have three files, FileA, FileB, FileC. I want to export FileA to FileB. FileA has an
# I have two files: FileA and FileB. I want to export FileA to FileB. FileA has an
# INTERNAL link. This case preserves the link to also be in FileB.

# In HDMF-Zarr, external links and internal links are the same mechanism.
###############
# breakpoint()
if data_filename != export_source or builder.parent.name != parent_name:
self.__add_link__(parent, data_filename, data.name, name)
elif parent.name != data_parent:
# breakpoint()
self.__add_link__(parent, os.path.abspath(self.path), data.name, name)
linked = True
dset = None

Expand Down Expand Up @@ -1464,6 +1472,7 @@ def __read_links(self, zarr_obj, parent):
builder = self.__read_group(target_zarr_obj, target_name)
else:
builder = self.__read_dataset(target_zarr_obj, target_name)
# breakpoint()
link_builder = LinkBuilder(builder=builder, name=link_name, source=self.source)
link_builder.location = os.path.join(parent.location, parent.name)
self._written_builders.set_written(link_builder) # record that the builder has been written
Expand Down Expand Up @@ -1520,6 +1529,7 @@ def __read_dataset(self, zarr_obj, name):
kwargs['data'] = data
if name is None:
name = str(os.path.basename(zarr_obj.name))
# breakpoint()
ret = DatasetBuilder(name, **kwargs) # create builder object for dataset
ret.location = ZarrIO.get_zarr_parent_path(zarr_obj)
self._written_builders.set_written(ret) # record that the builder has been written
Expand Down
Binary file added test_io_hdf5.h5
Binary file not shown.
Binary file added test_io_hdf5_2.h5
Binary file not shown.
24 changes: 13 additions & 11 deletions tests/unit/base_tests_zarrio.py
Original file line number Diff line number Diff line change
Expand Up @@ -1137,7 +1137,7 @@ def test_soft_link_group(self):
self.assertEqual(zarr_linkspec2.pop('source'), ".")
self.assertDictEqual(zarr_linkspec1, zarr_linkspec2)

# def test_soft_link_dataset(self):
# def test_soft_link_dataset(self): # This doesn't even export links???
# """Test that exporting a written file with soft linked datasets keeps links within the file."""
# """Link to a dataset in the same file should have a link to the same new dataset in the new file """
# # """
Expand All @@ -1151,10 +1151,12 @@ def test_soft_link_group(self):
# export_io.export(src_io=read_io, write_args=dict(link_data=False))
# with ZarrIO(self.store_path[1], manager=get_foo_buildmanager(), mode='r') as read_io:
# read_foofile2 = read_io.read()
# breakpoint()
# # make sure the linked dataset is within the same file
# print(open(self.store_path[1]+"/buckets/bucket1/foo_holder/foo1/.zattrs", 'r').read())
# self.assertEqual(read_foofile2.foofile_data.path, self.store_path[1])
# # """
# # self.assertEqual(read_foofile2.foofile_data.path, self.store_path[1])
# # self.assertTupleEqual(ZarrIO.get_zarr_paths(read_foofile2.foo_ref_attr.my_data),
# # (self.store_path[1], '/buckets/bucket1/foo_holder/foo1/my_data'))
# # # """

def test_external_link_group(self):
"""Test that exporting a written file with external linked groups maintains the links."""
Expand Down Expand Up @@ -1267,15 +1269,15 @@ def test_attr_reference(self):
export_io.export(src_io=read_io)
with ZarrIO(self.store_path[1], manager=get_foo_buildmanager(), mode='r') as read_io:
read_foofile2 = read_io.read()
# # breakpoint()
# breakpoint()
self.assertTupleEqual(ZarrIO.get_zarr_paths(read_foofile2.foo_ref_attr.my_data),
(self.store_path[1], '/buckets/bucket1/foo_holder/foo1/my_data'))
# make sure the attribute reference resolves to the container within the same file
# self.assertIs(read_foofile2.foo_ref_attr, read_foofile2.buckets['bucket1'].foos['foo1'])
# expected_ref = {'value': {'path': '/buckets/bucket1/foo_holder/foo1', 'source': self.source_paths[1]},
# 'zarr_dtype': 'object'}
# real_ref = zarr.open(self.store_path[1]).attrs['foo_ref_attr']
# self.assertDictEqual(real_ref, expected_ref)
# make sure the attribute reference resolves to the container within the same file
self.assertIs(read_foofile2.foo_ref_attr, read_foofile2.buckets['bucket1'].foos['foo1'])
expected_ref = {'value': {'path': '/buckets/bucket1/foo_holder/foo1', 'source': self.store_path[1]},
'zarr_dtype': 'object'}
real_ref = zarr.open(self.store_path[1]).attrs['foo_ref_attr']
self.assertDictEqual(real_ref, expected_ref)


def test_pop_data(self):
Expand Down
Loading

0 comments on commit 78ae3f1

Please sign in to comment.