From 43c600f904a2cbffe79339ff4208e64ff6640bdd Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 30 Aug 2024 12:46:31 +0200 Subject: [PATCH] tests: also w/ METS server + page-parallel and w/ METS caching --- Makefile | 2 +- tests/conftest.py | 32 ++++++++++++++++++++++++++++---- tests/test_binarize.py | 17 +++++++++-------- tests/test_recognize.py | 5 +++-- tests/test_segment.py | 19 ++++++++++--------- 5 files changed, 51 insertions(+), 24 deletions(-) diff --git a/Makefile b/Makefile index 5acb886..934202c 100644 --- a/Makefile +++ b/Makefile @@ -68,7 +68,7 @@ docker: # Run test test: tests/assets - $(PYTHON) -m pytest tests $(PYTEST_ARGS) + $(PYTHON) -m pytest tests --durations=0 $(PYTEST_ARGS) # # Assets diff --git a/tests/conftest.py b/tests/conftest.py index 3b8cd32..2f62241 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,16 +9,41 @@ from .assets import assets -@pytest.fixture -def workspace(tmpdir, pytestconfig): +CONFIGS = ['', 'pageparallel', 'metscache', 'pageparallel+metscache'] + +@pytest.fixture(params=CONFIGS) +def workspace(tmpdir, pytestconfig, request): def _make_workspace(workspace_path): initLogging() if pytestconfig.getoption('verbose') > 0: setOverrideLogLevel('DEBUG') with pushd_popd(tmpdir): - yield Resolver().workspace_from_url(workspace_path, dst_dir=tmpdir, download=True) + directory = str(tmpdir) + resolver = Resolver() + workspace = resolver.workspace_from_url(workspace_path, dst_dir=directory, download=True) + if 'metscache' in request.param: + config.OCRD_METS_CACHING = True + print("enabled METS caching") + if 'pageparallel' in request.param: + config.OCRD_MAX_PARALLEL_PAGES = 4 + print("enabled page-parallel processing") + def _start_mets_server(*args, **kwargs): + print("running with METS server") + server = OcrdMetsServer(*args, **kwargs) + server.startup() + process = Process(target=_start_mets_server, + kwargs={'workspace': workspace, 'url': 'mets.sock'}) + process.start() + sleep(1) + workspace = Workspace(resolver, directory, mets_server_url='mets.sock') + yield {'workspace': workspace, 'mets_server_url': 'mets.sock'} + process.terminate() + else: + yield {'workspace': workspace} + config.reset_defaults() return _make_workspace + @pytest.fixture def workspace_manifesto(workspace): yield from workspace(assets.path_to('communist_manifesto/data/mets.xml')) @@ -34,4 +59,3 @@ def workspace_aufklaerung_region(workspace): @pytest.fixture def workspace_sbb(workspace): yield from workspace(assets.url_of('SBB0000F29300010000/data/mets_one_file.xml')) - diff --git a/tests/test_binarize.py b/tests/test_binarize.py index 4396aec..2f8a522 100644 --- a/tests/test_binarize.py +++ b/tests/test_binarize.py @@ -2,41 +2,42 @@ import json -from .assets import assets - from ocrd import run_processor from ocrd_kraken.binarize import KrakenBinarize +from .assets import assets + + PARAM_JSON = assets.url_of('param-binarize.json') def test_param_json(workspace_sbb): - ws = workspace_sbb run_processor(KrakenBinarize, - workspace=ws, input_file_grp="OCR-D-IMG", output_file_grp="OCR-D-BIN-KRAKEN", parameter=json.load(open(PARAM_JSON)), + **workspace_sbb, ) + ws = workspace_sbb['workspace'] ws.save_mets() def test_binarize_regions(workspace_aufklaerung): - ws = workspace_aufklaerung run_processor(KrakenBinarize, - workspace=ws, input_file_grp="OCR-D-GT-PAGE", output_file_grp="OCR-D-BIN-KRAKEN", parameter={'level-of-operation': 'region'}, + **workspace_aufklaerung, ) + ws = workspace_aufklaerung['workspace'] ws.save_mets() # FIXME: add result assertions (find_files, parsing PAGE etc) def test_binarize_lines(workspace_aufklaerung): - ws = workspace_aufklaerung run_processor(KrakenBinarize, - workspace=ws, input_file_grp="OCR-D-GT-PAGE", output_file_grp="OCR-D-BIN-KRAKEN", parameter={'level-of-operation': 'line'}, + **workspace_aufklaerung, ) + ws = workspace_aufklaerung['workspace'] ws.save_mets() # FIXME: add result assertions (find_files, parsing PAGE etc) diff --git a/tests/test_recognize.py b/tests/test_recognize.py index 290d7d0..eef425a 100644 --- a/tests/test_recognize.py +++ b/tests/test_recognize.py @@ -3,12 +3,13 @@ from ocrd import run_processor from ocrd_kraken.recognize import KrakenRecognize + def test_recognize(workspace_manifesto): - ws = workspace_manifesto run_processor(KrakenRecognize, - workspace=ws, input_file_grp="OCR-D-SEG-KRAKEN", output_file_grp="OCR-D-OCR-KRAKEN", + **workspace_manifesto, ) + ws = workspace_manifesto['workspace'] ws.save_mets() # FIXME: add result assertions (find_files, parsing PAGE etc) diff --git a/tests/test_segment.py b/tests/test_segment.py index 27dc2d7..66a8ac6 100644 --- a/tests/test_segment.py +++ b/tests/test_segment.py @@ -3,36 +3,37 @@ from ocrd import run_processor from ocrd_kraken.segment import KrakenSegment + def test_run_blla(workspace_manifesto): - ws = workspace_manifesto run_processor(KrakenSegment, - workspace=ws, input_file_grp="OCR-D-IMG-BIN", output_file_grp="OCR-D-SEG-LINE-KRAKEN", - parameter={'maxcolseps': 0, 'use_legacy': False} + parameter={'maxcolseps': 0, 'use_legacy': False}, + **workspace_manifesto, ) + ws = workspace_manifesto['workspace'] ws.save_mets() # FIXME: add result assertions (find_files, parsing PAGE etc) def test_run_blla_regionlevel(workspace_aufklaerung_region): - ws = workspace_aufklaerung_region run_processor(KrakenSegment, - workspace=ws, input_file_grp="OCR-D-GT-SEG-REGION", output_file_grp="OCR-D-SEG-LINE-KRAKEN", page_id="phys_0005", - parameter={'maxcolseps': 0, 'use_legacy': False} + parameter={'maxcolseps': 0, 'use_legacy': False}, + **workspace_aufklaerung_region, ) + ws = workspace_aufklaerung_region['workspace'] ws.save_mets() # FIXME: add result assertions (find_files, parsing PAGE etc) def test_run_legacy(workspace_manifesto): - ws = workspace_manifesto run_processor(KrakenSegment, - workspace=ws, input_file_grp="OCR-D-IMG-BIN", output_file_grp="OCR-D-SEG-LINE-KRAKEN", - parameter={'maxcolseps': 0, 'use_legacy': True} + parameter={'maxcolseps': 0, 'use_legacy': True}, + **workspace_manifesto, ) + ws = workspace_manifesto['workspace'] ws.save_mets() # FIXME: add result assertions (find_files, parsing PAGE etc)