Skip to content

Commit

Permalink
fix relWorksCache: save cache file to disk
Browse files Browse the repository at this point in the history
  • Loading branch information
mokko committed Jun 17, 2024
1 parent b960a5c commit 4f3041e
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 4 deletions.
4 changes: 2 additions & 2 deletions zml2lido/lidoTool.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,11 +248,11 @@ def zml2lidoSingle(self, *, src: str | Path, xslt="zml2lido") -> Path:
"""
srcP = Path(src)
lidoFn = self.outdir.joinpath(srcP.stem + ".lido.xml")
print(f"zml2lidoSingle with {xsl[xslt]}") # with file '{lidoFn}'
# print(f"zml2lidoSingle with {xsl[xslt]}") # with file '{lidoFn}'

if self.force is True or not lidoFn.exists():
if srcP.suffix == ".zip": # unzipping temp file
print(" src is zipped")
print(f" src is zipped {srcP}")
parent_dir = srcP.parent
member = Path(srcP.name).with_suffix(".xml")
temp_fn = parent_dir / member
Expand Down
5 changes: 4 additions & 1 deletion zml2lido/linkChecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def fixRelatedWorks(self) -> None:
)

# for //relatedWork in the current LIDO document
for objectID_N in relatedWorksL:
for idx, objectID_N in enumerate(relatedWorksL):
# don't _log self._log(f"fixRelatedWorks checking {objectID_N.text}")

# assuming that source always exists
Expand Down Expand Up @@ -98,6 +98,9 @@ def fixRelatedWorks(self) -> None:
self.rwc.lookup_relWork(mtype=mtype, ID=id_int)
# at this point we can rely on item being in relWorks cache
self._rewrite_relWork(mtype=mtype, objectID_N=objectID_N)
if idx % 10:
print("Saving relWorks cache")
self.rwc.save()

def linkResource_online_http(self) -> None:
"""
Expand Down
17 changes: 16 additions & 1 deletion zml2lido/relWorksCache.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def __init__(self, *, maxSize: int = 20_000, cache_dir: Path) -> None:
self.cache = Module()
self.maxSize = maxSize
self.cache_path = cache_dir / "relWorks_cache.xml"
print(f"{self.cache_path=}")

user, pw, baseURL = get_credentials()
self.client = MpApi(baseURL=baseURL, user=user, pw=pw)
Expand All @@ -52,6 +53,12 @@ def lookup_from_lido_chunks(self, *, path: Path) -> None:
"""
for p in per_chunk(path=path):
self.lookup_from_lido_file(path=p)
print(f"Cache size: {len(self.cache)}")
self.save()
if len(self.cache) > self.maxSize:
print("Cache is big enough. Let's go {len(self.cache)}")
self.save()
break

def lookup_from_lido_file(self, *, path: Path) -> None:
"""
Expand All @@ -65,6 +72,9 @@ def lookup_from_lido_file(self, *, path: Path) -> None:
IDs = self._lido_to_ids(path=path)
for mtype, id_int in IDs:
self.lookup_relWork(mtype=mtype, ID=id_int)
if len(self.cache) > self.maxSize:
print("relWorksCache has reached maxSize")
break

def lookup_relWork(self, *, mtype: str, ID: int) -> None:
"""
Expand Down Expand Up @@ -139,7 +149,12 @@ def save(self) -> Path:
"""
path: Path = self.cache_path
print(f"Saving file cache {self.cache_path}")
self.cache.toFile(path=path)
try:
self.cache.toFile(path=path)
except KeyboardInterrupt:
print(
"Catching keyboard interrupt while saving of relWorksCache; try again..."
)
return path

#
Expand Down

0 comments on commit 4f3041e

Please sign in to comment.