Skip to content

Commit

Permalink
tests: also w/ METS server + page-parallel and w/ METS caching
Browse files Browse the repository at this point in the history
  • Loading branch information
bertsky committed Aug 30, 2024
1 parent 316eedb commit 43c600f
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 24 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ docker:

# Run test
test: tests/assets
$(PYTHON) -m pytest tests $(PYTEST_ARGS)
$(PYTHON) -m pytest tests --durations=0 $(PYTEST_ARGS)

#
# Assets
Expand Down
32 changes: 28 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,41 @@

from .assets import assets

@pytest.fixture
def workspace(tmpdir, pytestconfig):
CONFIGS = ['', 'pageparallel', 'metscache', 'pageparallel+metscache']

@pytest.fixture(params=CONFIGS)
def workspace(tmpdir, pytestconfig, request):
def _make_workspace(workspace_path):
initLogging()
if pytestconfig.getoption('verbose') > 0:
setOverrideLogLevel('DEBUG')
with pushd_popd(tmpdir):
yield Resolver().workspace_from_url(workspace_path, dst_dir=tmpdir, download=True)
directory = str(tmpdir)
resolver = Resolver()
workspace = resolver.workspace_from_url(workspace_path, dst_dir=directory, download=True)
if 'metscache' in request.param:
config.OCRD_METS_CACHING = True
print("enabled METS caching")
if 'pageparallel' in request.param:
config.OCRD_MAX_PARALLEL_PAGES = 4
print("enabled page-parallel processing")
def _start_mets_server(*args, **kwargs):
print("running with METS server")
server = OcrdMetsServer(*args, **kwargs)
server.startup()
process = Process(target=_start_mets_server,
kwargs={'workspace': workspace, 'url': 'mets.sock'})
process.start()
sleep(1)
workspace = Workspace(resolver, directory, mets_server_url='mets.sock')
yield {'workspace': workspace, 'mets_server_url': 'mets.sock'}
process.terminate()
else:
yield {'workspace': workspace}
config.reset_defaults()
return _make_workspace


@pytest.fixture
def workspace_manifesto(workspace):
yield from workspace(assets.path_to('communist_manifesto/data/mets.xml'))
Expand All @@ -34,4 +59,3 @@ def workspace_aufklaerung_region(workspace):
@pytest.fixture
def workspace_sbb(workspace):
yield from workspace(assets.url_of('SBB0000F29300010000/data/mets_one_file.xml'))

17 changes: 9 additions & 8 deletions tests/test_binarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,41 +2,42 @@

import json

from .assets import assets

from ocrd import run_processor
from ocrd_kraken.binarize import KrakenBinarize

from .assets import assets


PARAM_JSON = assets.url_of('param-binarize.json')

def test_param_json(workspace_sbb):
ws = workspace_sbb
run_processor(KrakenBinarize,
workspace=ws,
input_file_grp="OCR-D-IMG",
output_file_grp="OCR-D-BIN-KRAKEN",
parameter=json.load(open(PARAM_JSON)),
**workspace_sbb,
)
ws = workspace_sbb['workspace']
ws.save_mets()

def test_binarize_regions(workspace_aufklaerung):
ws = workspace_aufklaerung
run_processor(KrakenBinarize,
workspace=ws,
input_file_grp="OCR-D-GT-PAGE",
output_file_grp="OCR-D-BIN-KRAKEN",
parameter={'level-of-operation': 'region'},
**workspace_aufklaerung,
)
ws = workspace_aufklaerung['workspace']
ws.save_mets()
# FIXME: add result assertions (find_files, parsing PAGE etc)

def test_binarize_lines(workspace_aufklaerung):
ws = workspace_aufklaerung
run_processor(KrakenBinarize,
workspace=ws,
input_file_grp="OCR-D-GT-PAGE",
output_file_grp="OCR-D-BIN-KRAKEN",
parameter={'level-of-operation': 'line'},
**workspace_aufklaerung,
)
ws = workspace_aufklaerung['workspace']
ws.save_mets()
# FIXME: add result assertions (find_files, parsing PAGE etc)
5 changes: 3 additions & 2 deletions tests/test_recognize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
from ocrd import run_processor
from ocrd_kraken.recognize import KrakenRecognize


def test_recognize(workspace_manifesto):
ws = workspace_manifesto
run_processor(KrakenRecognize,
workspace=ws,
input_file_grp="OCR-D-SEG-KRAKEN",
output_file_grp="OCR-D-OCR-KRAKEN",
**workspace_manifesto,
)
ws = workspace_manifesto['workspace']
ws.save_mets()
# FIXME: add result assertions (find_files, parsing PAGE etc)
19 changes: 10 additions & 9 deletions tests/test_segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,36 +3,37 @@
from ocrd import run_processor
from ocrd_kraken.segment import KrakenSegment


def test_run_blla(workspace_manifesto):
ws = workspace_manifesto
run_processor(KrakenSegment,
workspace=ws,
input_file_grp="OCR-D-IMG-BIN",
output_file_grp="OCR-D-SEG-LINE-KRAKEN",
parameter={'maxcolseps': 0, 'use_legacy': False}
parameter={'maxcolseps': 0, 'use_legacy': False},
**workspace_manifesto,
)
ws = workspace_manifesto['workspace']
ws.save_mets()
# FIXME: add result assertions (find_files, parsing PAGE etc)

def test_run_blla_regionlevel(workspace_aufklaerung_region):
ws = workspace_aufklaerung_region
run_processor(KrakenSegment,
workspace=ws,
input_file_grp="OCR-D-GT-SEG-REGION",
output_file_grp="OCR-D-SEG-LINE-KRAKEN",
page_id="phys_0005",
parameter={'maxcolseps': 0, 'use_legacy': False}
parameter={'maxcolseps': 0, 'use_legacy': False},
**workspace_aufklaerung_region,
)
ws = workspace_aufklaerung_region['workspace']
ws.save_mets()
# FIXME: add result assertions (find_files, parsing PAGE etc)

def test_run_legacy(workspace_manifesto):
ws = workspace_manifesto
run_processor(KrakenSegment,
workspace=ws,
input_file_grp="OCR-D-IMG-BIN",
output_file_grp="OCR-D-SEG-LINE-KRAKEN",
parameter={'maxcolseps': 0, 'use_legacy': True}
parameter={'maxcolseps': 0, 'use_legacy': True},
**workspace_manifesto,
)
ws = workspace_manifesto['workspace']
ws.save_mets()
# FIXME: add result assertions (find_files, parsing PAGE etc)

0 comments on commit 43c600f

Please sign in to comment.