diff --git a/zml2lido/lidoTool.py b/zml2lido/lidoTool.py index 94ba9f7..43b998a 100644 --- a/zml2lido/lidoTool.py +++ b/zml2lido/lidoTool.py @@ -146,11 +146,11 @@ def to_lvl2_single(self, *, src: Path) -> Path: self.lc = LinkChecker(src=src, chunks=self.chunks) out_fn = self._lvl2_path(src) if not out_fn.exists() or self.force: - self.lc.new_src(src=src) + self.lc.load_lvl1(src=src) # self.lc.relWorks_cache_single(fn=src) self.lc.rmUnpublishedRecords() # remove unpublished records (not on SMB-Digital) self.lc.fixRelatedWorks() - self.lc.save(out_fn) + self.lc.save(lvl2=out_fn) else: print(f" lvl2 already exists: {out_fn}") return out_fn @@ -161,7 +161,7 @@ def split_lido(self, *, src: Path) -> Path: if self.chunks: self.force = True # otherwise subsequent chunks are not written for chunkFn in self.loopChunks(src=src): - logging.debug(f"WARN: split_lido: XXXXX: {chunkFn}") + # logging.debug(f"WARN: split_lido: XXXXX: {chunkFn}") self.split_lido_single(src=chunkFn) else: self.split_lido_single(src=src) diff --git a/zml2lido/linkChecker.py b/zml2lido/linkChecker.py index a414c52..eef536e 100644 --- a/zml2lido/linkChecker.py +++ b/zml2lido/linkChecker.py @@ -7,14 +7,14 @@ This step produces lvl2 lido. USAGE: -lc = LinkChecker(src="path/to/file.lido.xml") - +lc = LinkChecker(src=path/to/file.lido.xml") # accepts path as string +lc.load_lvl1(src=path) lc.fixRelatedWorks() # removes dead links in relatedWorks, also adds ISIL lc.linkResource_online_http() # for all linkResources print online status lc.rmInternalLinks() # remove linkResource with internal links, not used atm lc.rmUnpublishedRecords() # removes objects without objectPublishedID -lc.save(out_fn="path/to/lido.lvl2.xml") +lc.save(lvl2="path/to/lido.lvl2.xml") """ @@ -138,6 +138,9 @@ def linkResource_online_http(self) -> None: else: print("\tsuccess") + def load_lvl1(self, *, src: Path) -> None: + self.data = etree.parse(str(src)) + def rmInternalLinks(self) -> None: """ Remove resourceSet whose linkResource point to internal links; @@ -156,9 +159,6 @@ def rmInternalLinks(self) -> None: resourceSet = link.getparent().getparent() resourceSet.getparent().remove(resourceSet) - def new_src(self, *, src: Path) -> None: - self.data = etree.parse(str(src)) - def rmUnpublishedRecords(self) -> None: """ Remove lido records which are not published on SMB Digital. @@ -177,16 +177,16 @@ def rmUnpublishedRecords(self) -> None: recordN.getparent().remove(recordN) logging.debug("rmUnpublishedRecords: done!") - def save(self, out_fn: str | Path) -> str: + def save(self, *, lvl2: Path) -> Path: """ During __init__ we loaded a LIDO file, with this function we write it back to the out file location as set during __init__. """ - logging.debug(f"Writing back to {out_fn}") + logging.debug(f"Writing back to {lvl2}") self.data.write( - str(out_fn), pretty_print=True, encoding="UTF-8", xml_declaration=True + str(lvl2), pretty_print=True, encoding="UTF-8", xml_declaration=True ) - return out_fn + return lvl2 # # PRIVATE @@ -197,7 +197,7 @@ def _del_relWork(self, *, ID_N: Any) -> None: delete a relWork from self.etree. ID is a lxml node """ - logging.debug(f" removing unpublic relWork {ID_N.text}") + # logging.debug(f" removing unpublic relWork {ID_N.text}") relWorkSet = ID_N.getparent().getparent().getparent() relWorkWrap = relWorkSet.getparent() relWorkWrap.remove(relWorkSet)