From 78ae3f1f369c95000eed5ac5ab845894fd838af3 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Wed, 4 Sep 2024 14:49:20 -0700 Subject: [PATCH] i think I cracked it boys --- src/hdmf_zarr/backend.py | 26 +- test_io_hdf5.h5 | Bin 0 -> 17040 bytes test_io_hdf5_2.h5 | Bin 0 -> 12688 bytes tests/unit/base_tests_zarrio.py | 24 +- tests/unit/test_zarrio.py | 428 ++++++++++++++++---------------- 5 files changed, 245 insertions(+), 233 deletions(-) create mode 100644 test_io_hdf5.h5 create mode 100644 test_io_hdf5_2.h5 diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index c603dd0..a2abff7 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -527,6 +527,7 @@ def write_group(self, **kwargs): subgroups = builder.groups if subgroups: + # breakpoint() for subgroup_name, sub_builder in subgroups.items(): self.write_group( parent=group, @@ -603,6 +604,7 @@ def write_attributes(self, **kwargs): if isinstance(value, Builder): refs = self._create_ref(value, export_source) else: + # breakpoint() refs = self._create_ref(value.builder, export_source) tmp = {'zarr_dtype': type_str, 'value': refs} obj.attrs[key] = tmp @@ -714,7 +716,6 @@ def resolve_ref(self, zarr_ref): 2) the target zarr object within the target file """ # Extract the path as defined in the zarr_ref object - # breakpoint() if zarr_ref.get('source', None) is None: source_file = str(zarr_ref['path']) else: @@ -743,6 +744,7 @@ def resolve_ref(self, zarr_ref): except Exception: raise ValueError("Found bad link to object %s in file %s" % (object_path, source_file)) # Return the create path + # breakpoint() return target_name, target_zarr_obj def _create_ref(self, ref_object, export_source=None): @@ -764,7 +766,7 @@ def _create_ref(self, ref_object, export_source=None): builder = ref_object.builder else: builder = self.manager.build(ref_object) - + breakpoint() path = self.__get_path(builder) # TODO Add to get region for region references. # Also add {'name': 'region', 'type': (slice, list, tuple), @@ -825,6 +827,7 @@ def __add_link__(self, parent, target_source, target_path, link_name): :param link_name: Name of the link :type link_name: str """ + breakpoint() if 'zarr_link' not in parent.attrs: parent.attrs['zarr_link'] = [] zarr_link = list(parent.attrs['zarr_link']) @@ -986,21 +989,26 @@ def write_dataset(self, **kwargs): # noqa: C901 dset = None else: # exporting parent_filename = parent.store.path - parent_name = parent.name.split('/')[-1] # The parent is a zarr object whose name is a relative path. - ############### + parent_name = ''.join(char for char in parent.name if char.isalpha()) # zarr parent name has '/' ############### + data_parent = '/'.join(data.name.split('/')[:-1]) + # Case 1: The dataset is NOT in the export source, create a link to preserve the external link. # I have three files, FileA, FileB, FileC. I want to export FileA to FileB. FileA has an # EXTERNAL link to a dataset in Filec. This case preserves the link to FileC to also be in FileB. - + if data_filename != export_source: + self.__add_link__(parent, data_filename, data.name, name) + linked = True + dset = None # Case 2: The dataset is in the export source and has a DIFFERENT path as the builder, create a link. - # I have three files, FileA, FileB, FileC. I want to export FileA to FileB. FileA has an + # I have two files: FileA and FileB. I want to export FileA to FileB. FileA has an # INTERNAL link. This case preserves the link to also be in FileB. # In HDMF-Zarr, external links and internal links are the same mechanism. ############### # breakpoint() - if data_filename != export_source or builder.parent.name != parent_name: - self.__add_link__(parent, data_filename, data.name, name) + elif parent.name != data_parent: + # breakpoint() + self.__add_link__(parent, os.path.abspath(self.path), data.name, name) linked = True dset = None @@ -1464,6 +1472,7 @@ def __read_links(self, zarr_obj, parent): builder = self.__read_group(target_zarr_obj, target_name) else: builder = self.__read_dataset(target_zarr_obj, target_name) + # breakpoint() link_builder = LinkBuilder(builder=builder, name=link_name, source=self.source) link_builder.location = os.path.join(parent.location, parent.name) self._written_builders.set_written(link_builder) # record that the builder has been written @@ -1520,6 +1529,7 @@ def __read_dataset(self, zarr_obj, name): kwargs['data'] = data if name is None: name = str(os.path.basename(zarr_obj.name)) + # breakpoint() ret = DatasetBuilder(name, **kwargs) # create builder object for dataset ret.location = ZarrIO.get_zarr_parent_path(zarr_obj) self._written_builders.set_written(ret) # record that the builder has been written diff --git a/test_io_hdf5.h5 b/test_io_hdf5.h5 new file mode 100644 index 0000000000000000000000000000000000000000..00f87f2b983c6e8c6ac8e97af8e31803c1ff4cac GIT binary patch literal 17040 zcmeHO&2JM&6kjI=0t7XvRQ-@zR&J4^3M1QbK1@~NP@t|vMp6(*Yf3Tc!)~a@DcfB~W8bRxB z1clL-R}1{0=EOnRf&!7#tTY%Ar=Uere$TS}mS?G9o#k?r{60NN^m8NnGu4xcfG)pc zW%pQqWY#3Vxt^SsrQ$!v-+g`aTFRQWMDA>sThRkuj%8d~n>rM_Tb$o!yDfJ3`A36v z-DS$#A5%Oj-;{N1zz)bhws@Xqc+h?9*%_vv6kyCe#zH{11ENO{d4CXQei&BjVZ-&> zR3ds<5`r;fvID7}9auau$ajPDTiQ6r>}KN0hn)v*ou;X8vwn{{4?I6h^kSDM9yl|> z0R(hAAp35Y*tnPD^f>9r%s+2f?9vzv4Qv^UCbN+MCpq*nvBof1r&+#$#f^>o~0m1D9e?S1LsX z^7BsOS@0iV%>pPq>3m5sItDo`pP89a4d!_KmW_$ z-#q^CWNF?i?Q4$?hWFsSiarSZqq7H{PUReeZb3O$VA;G?hA+~dQsZbjO)uJVYMv?` zj~||Kbbk0_rxM@aIw(J!u8a@UI=`^`qgRV7fqTI5;q`^^!!wtxhwdlU`#B5_c&5|& zjf+EaosPeN-(he-7x7U4rwx@5QP;%&pib-j4x`8T*k_m?>-V?%I>~)iiSYwgD#TtC zS88Ef?6s_0iR;IW#NO_4LwQfs_j{a#{cg^>gbzF#VNLkI0|~@m`tRRQNrG!65a}-&OTAhl=4<7zOFK>Z}y=IX{P8u4M|vypyS}=JJ`Glg}<^tKON!i>MpHhsw->GQqe1V zj#I@Jd_7{Jp9RBCdb1sNwxaaqA9m8H)DgTkq70r^(wEagD^6cbyI~E|PRsVTo#s}< zvzZpE(6m4udvu`yeRn~%15+VJdQ;%&ZF+6HN7rYf+ox*pt1;UH zZ=vj;gSc)xYFxHz4Q=GmRdGEZh6vytV0aOXzO^}Qbh*UnVsAT6?_LDp^eOs{(20k! zSW2)^glU)}a7nC0B1ydCx@W`iBRgo>aXpA^dRPOT?sc429K`owexD%Y-kgILE*Z`_ ziC@&I5_*2vRve)Xub|(%qaSE76mjH`0Nk#Ht=I`#2p*2&C96QHqo7O7G?Ao9-)wm{ z^w9(}j6Q+Lgtp(<4?`d9fx1i?1SZ+_x{XA3=#vcbLU}Gq6Y%T< z+n1i6#-O@c!@N z{TximBqK0NKAwC=vloBKBx7ObwjTlg9oAEJe2E`X_j~T=U`h~yVSe)Rr-Vkx%%_9l&gOMpV~_W literal 0 HcmV?d00001 diff --git a/test_io_hdf5_2.h5 b/test_io_hdf5_2.h5 new file mode 100644 index 0000000000000000000000000000000000000000..fd0389532a456dffb44b98462b16e25a93f74192 GIT binary patch literal 12688 zcmeHNQE%Kt5MC#ZxJE@P5)!2dw6YLHsEIC@K-0X?Kojl(krVKMgsQ90Ugu2j?5pi7 zjiQtC7~b;8Pv}qJC-f)q*hl^a%Tec4oXc_HXTb2XC+5 zTorgV8sdt$uDks6H(lqOhQjwe{(w3TwP(~mT{0a)w+GB_QWoBVjB++8<{bN7}TBtG=LSgI%9a5PYocwq=3fwAFn7imE7@;r@tV=%=C zs!j!o>PV%s;F}6TQVW+WEk3zFxz7shNd!o0o#o8gMgAJ)ed%evq@E3PK%Xx5yMYQp zMF8&t>SdsPd{n31W=XqTJ;~k4EdZ`+VmL^=yiUYcNrYx}!9aA9*4GqRBpwgKc zwTPpH65!HL;@pd31P`@%$u4}QPoWyRlBK1tG5Ro864n?(IH0O#R(2tqd4wDhhELndlH#s5*i<(jZy)6(+sdv?*5h2Td( z?cx-mrs~l+N>$+QhTb4kMR1?EF9HuHQtT(aUz88=h4DY(W8jeT*NJvoor%i|fzx4r zKydUJET8#(HT3j;X3049F;^jz-Wfs>?{(G%{>LiK;J*-XH8(aln!q-pHL+<^E-#U6 z*CJpMun1TLECLn*i-1MIB2bF}*BPEs{g3OI)jC6Q5GP6V>I_&P;5q}=465?XVY(Xy z7nFx}1(t`khPwJ*wVpZeJ=LGmD8Thrb7y}S1%YaPkL%>4N`Zy_2UbQ zT{t#JteD(Pz;R|}9GBq>FYMQy<5wUP$yY J|1Zy#{{Rf>n)m