Skip to content

Commit

Permalink
tests: use workspace manifesto→aufklaerung (1→2 pages), binarize ad h…
Browse files Browse the repository at this point in the history
…oc where needed
  • Loading branch information
bertsky committed Aug 30, 2024
1 parent 32b2e9c commit c73b3ef
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 13 deletions.
17 changes: 13 additions & 4 deletions tests/test_recognize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,23 @@

from ocrd import run_processor
from ocrd_kraken.recognize import KrakenRecognize
from ocrd_kraken.binarize import KrakenBinarize


def test_recognize(workspace_manifesto):
def test_recognize(workspace_aufklaerung):
# some models (like default en) require binarized images
run_processor(KrakenBinarize,
input_file_grp="OCR-D-GT-PAGE",
output_file_grp="OCR-D-GT-PAGE-BIN",
**workspace_aufklaerung,
)
run_processor(KrakenRecognize,
input_file_grp="OCR-D-SEG-KRAKEN",
# re-use layout, overwrite text:
input_file_grp="OCR-D-GT-PAGE-BIN",
output_file_grp="OCR-D-OCR-KRAKEN",
**workspace_manifesto,
parameter={'overwrite_text': True},
**workspace_aufklaerung,
)
ws = workspace_manifesto['workspace']
ws = workspace_aufklaerung['workspace']
ws.save_mets()
# FIXME: add result assertions (find_files, parsing PAGE etc)
27 changes: 18 additions & 9 deletions tests/test_segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,25 @@

from ocrd import run_processor
from ocrd_kraken.segment import KrakenSegment
from ocrd_kraken.binarize import KrakenBinarize


def test_run_blla(workspace_manifesto):
def test_run_blla(workspace_aufklaerung):
run_processor(KrakenSegment,
input_file_grp="OCR-D-IMG-BIN",
input_file_grp="OCR-D-IMG",
output_file_grp="OCR-D-SEG-LINE-KRAKEN",
parameter={'maxcolseps': 0, 'use_legacy': False},
**workspace_manifesto,
**workspace_aufklaerung,
)
ws = workspace_manifesto['workspace']
ws = workspace_aufklaerung['workspace']
ws.save_mets()
# FIXME: add result assertions (find_files, parsing PAGE etc)

def test_run_blla_regionlevel(workspace_aufklaerung_region):
run_processor(KrakenSegment,
input_file_grp="OCR-D-GT-SEG-REGION",
output_file_grp="OCR-D-SEG-LINE-KRAKEN",
# only 1 page (takes 3min per page without GPU)
page_id="phys_0005",
parameter={'maxcolseps': 0, 'use_legacy': False},
**workspace_aufklaerung_region,
Expand All @@ -27,13 +29,20 @@ def test_run_blla_regionlevel(workspace_aufklaerung_region):
ws.save_mets()
# FIXME: add result assertions (find_files, parsing PAGE etc)

def test_run_legacy(workspace_manifesto):
def test_run_legacy(workspace_aufklaerung):
# legacy segmentation requires binarized images
run_processor(KrakenBinarize,
input_file_grp="OCR-D-GT-PAGE",
output_file_grp="OCR-D-GT-PAGE-BIN",
**workspace_aufklaerung,
)
run_processor(KrakenSegment,
input_file_grp="OCR-D-IMG-BIN",
# overwrite layout:
input_file_grp="OCR-D-GT-PAGE-BIN",
output_file_grp="OCR-D-SEG-LINE-KRAKEN",
parameter={'maxcolseps': 0, 'use_legacy': True},
**workspace_manifesto,
parameter={'maxcolseps': 0, 'use_legacy': True, 'overwrite_segments': True},
**workspace_aufklaerung,
)
ws = workspace_manifesto['workspace']
ws = workspace_aufklaerung['workspace']
ws.save_mets()
# FIXME: add result assertions (find_files, parsing PAGE etc)

0 comments on commit c73b3ef

Please sign in to comment.