-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
committing working version before transition to new relWorksCache
- Loading branch information
Showing
7 changed files
with
111,430 additions
and
51 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
<application xmlns="http://www.zetcom.com/ria/ws/module"> | ||
<modules> | ||
<module name="Object" totalSize="1"> | ||
<moduleItem hasAttachments="true" id="2268694" uuid="2268694"> | ||
<systemField dataType="Timestamp" name="__lastModified"> | ||
<value>2024-02-20 12:44:39.884</value> | ||
<formattedValue language="de">20.02.2024 12:44</formattedValue> | ||
</systemField> | ||
<repeatableGroup name="ObjPublicationGrp" size="1"> | ||
<repeatableGroupItem id="56748382" uuid="a833945d-a46a-4e5c-9047-d3b76fe0f30f"> | ||
<vocabularyReference name="PublicationVoc" id="62649" instanceName="ObjPublicationVgr"> | ||
<vocabularyReferenceItem id="1810139" name="Ja"> | ||
<formattedValue language="de">Ja</formattedValue> | ||
</vocabularyReferenceItem> | ||
</vocabularyReference> | ||
<vocabularyReference name="TypeVoc" id="62650" instanceName="ObjPublicationTypeVgr"> | ||
<vocabularyReferenceItem id="2600647" name="Daten freigegeben für SMB-digital"> | ||
<formattedValue language="de">Daten freigegeben für SMB-digital</formattedValue> | ||
</vocabularyReferenceItem> | ||
</vocabularyReference> | ||
</repeatableGroupItem> | ||
</repeatableGroup> | ||
<moduleReference name="ObjOwnerRef" targetModule="Address" multiplicity="N:1" size="1"> | ||
<moduleReferenceItem moduleItemId="67678" uuid="67678"> | ||
<formattedValue language="de">Ethnologisches Museum, Staatliche Museen zu Berlin</formattedValue> | ||
</moduleReferenceItem> | ||
</moduleReference> | ||
</moduleItem> | ||
</module> | ||
</modules> | ||
</application> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from pathlib import Path | ||
from zml2lido.file import per_chunk, unzip | ||
|
||
|
||
def test_per_chunk(): | ||
p = Path(r"C:\m3\zml2lido\sdata\GG\20240307\query516069-chunk1.lido.xml") | ||
if not p.exists(): | ||
raise FileNotFound("p not found!") | ||
assert 2 == len(list(per_chunk(p))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
from pathlib import Path | ||
from zml2lido.relWorksCache import RelWorksCache | ||
|
||
|
||
def test_init(): | ||
rw = RelWorksCache() | ||
assert rw.maxSize == 20_000 | ||
rw = RelWorksCache(maxSize=40_000) | ||
assert rw.maxSize == 40_000 | ||
# print (f"{rw.maxSize=}") | ||
|
||
|
||
def test_add_relWork(): | ||
""" | ||
We test add_relWork and save... | ||
""" | ||
fn = Path("relWorks_cache.xml") | ||
if fn.exists(): | ||
fn.unlink() | ||
rw = RelWorksCache() | ||
rw.add_relWork(mtype="Object", ID=2268694) | ||
rw.save(path=fn) | ||
assert 1 == len(rw.cache) | ||
rw.add_relWork(mtype="Object", ID=3486950) | ||
assert 2 == len(rw.cache) | ||
# print (f"{rw=}") | ||
|
||
|
||
def test_load_cache_file(): | ||
fn = Path("relWorks_cache.xml") | ||
if fn.exists(): | ||
fn.unlink() | ||
rw = RelWorksCache() | ||
rw.add_relWork(mtype="Object", ID=2268694) | ||
rw.save(path=fn) | ||
|
||
rw2 = RelWorksCache() | ||
rw2.load_cache_file(path=fn) | ||
assert 1 == rw2.length() | ||
|
||
|
||
def test_lido_to_ids(): | ||
""" | ||
Also tests 'add_from_lido_file' | ||
""" | ||
rw = RelWorksCache() | ||
lido_fn = Path("group416397-chunk1.lido.xml") | ||
ids = rw._lido_to_ids(path=lido_fn) | ||
assert 171 == len(ids) | ||
rw.add_from_lido_file(path=lido_fn) | ||
ids2 = rw._lido_to_ids(path=lido_fn) | ||
assert 0 == len(ids2) | ||
assert 171 == rw.length() | ||
print(f"{rw.length()}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
""" | ||
WORK IN PROGRESS - File helpers for zml2lido | ||
We're currently only zipping zml files, not lido files automatically | ||
unpacked_path = unzip(Path("group1234-chunk1.zip") | ||
for chunk in per_chunk(chunk_path): | ||
do_something_with(chunk) | ||
""" | ||
from zipfile import ZipFile | ||
from pathlib import Path | ||
import re | ||
|
||
|
||
def per_chunk(path: Path): | ||
""" | ||
Loop through chunks easily. Not yet used in production. | ||
""" | ||
path2 = path | ||
while path2.exists(): | ||
yield path2 | ||
stem = str(path2).split(".lido.xml")[0] | ||
m = re.search(r"-chunk(\d+)$", stem) | ||
if m: | ||
no = int(m.group(1)) | ||
new_no = no + 1 | ||
head = re.sub(r"\d+$", "", stem) | ||
path2 = Path(f"{head}{new_no}.lido.xml") | ||
else: | ||
raise Exception("Not chunkable") | ||
|
||
|
||
def unzip(path: Path): | ||
parent_dir = path.parent | ||
member = Path(path.name).with_suffix(".xml") | ||
temp_fn = parent_dir / member | ||
with ZipFile(path, "r") as zippy: | ||
zippy.extract(str(member), path=parent_dir) | ||
return temp_fn |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.