From c73b3efa6dd771db06d207303370fb0c71395fa4 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 30 Aug 2024 13:57:04 +0200 Subject: [PATCH] =?UTF-8?q?tests:=20use=20workspace=20manifesto=E2=86=92au?= =?UTF-8?q?fklaerung=20(1=E2=86=922=20pages),=20binarize=20ad=20hoc=20wher?= =?UTF-8?q?e=20needed?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_recognize.py | 17 +++++++++++++---- tests/test_segment.py | 27 ++++++++++++++++++--------- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/tests/test_recognize.py b/tests/test_recognize.py index eef425a..3ebeb71 100644 --- a/tests/test_recognize.py +++ b/tests/test_recognize.py @@ -2,14 +2,23 @@ from ocrd import run_processor from ocrd_kraken.recognize import KrakenRecognize +from ocrd_kraken.binarize import KrakenBinarize -def test_recognize(workspace_manifesto): +def test_recognize(workspace_aufklaerung): + # some models (like default en) require binarized images + run_processor(KrakenBinarize, + input_file_grp="OCR-D-GT-PAGE", + output_file_grp="OCR-D-GT-PAGE-BIN", + **workspace_aufklaerung, + ) run_processor(KrakenRecognize, - input_file_grp="OCR-D-SEG-KRAKEN", + # re-use layout, overwrite text: + input_file_grp="OCR-D-GT-PAGE-BIN", output_file_grp="OCR-D-OCR-KRAKEN", - **workspace_manifesto, + parameter={'overwrite_text': True}, + **workspace_aufklaerung, ) - ws = workspace_manifesto['workspace'] + ws = workspace_aufklaerung['workspace'] ws.save_mets() # FIXME: add result assertions (find_files, parsing PAGE etc) diff --git a/tests/test_segment.py b/tests/test_segment.py index 66a8ac6..ec7e1ee 100644 --- a/tests/test_segment.py +++ b/tests/test_segment.py @@ -2,16 +2,17 @@ from ocrd import run_processor from ocrd_kraken.segment import KrakenSegment +from ocrd_kraken.binarize import KrakenBinarize -def test_run_blla(workspace_manifesto): +def test_run_blla(workspace_aufklaerung): run_processor(KrakenSegment, - input_file_grp="OCR-D-IMG-BIN", + input_file_grp="OCR-D-IMG", output_file_grp="OCR-D-SEG-LINE-KRAKEN", parameter={'maxcolseps': 0, 'use_legacy': False}, - **workspace_manifesto, + **workspace_aufklaerung, ) - ws = workspace_manifesto['workspace'] + ws = workspace_aufklaerung['workspace'] ws.save_mets() # FIXME: add result assertions (find_files, parsing PAGE etc) @@ -19,6 +20,7 @@ def test_run_blla_regionlevel(workspace_aufklaerung_region): run_processor(KrakenSegment, input_file_grp="OCR-D-GT-SEG-REGION", output_file_grp="OCR-D-SEG-LINE-KRAKEN", + # only 1 page (takes 3min per page without GPU) page_id="phys_0005", parameter={'maxcolseps': 0, 'use_legacy': False}, **workspace_aufklaerung_region, @@ -27,13 +29,20 @@ def test_run_blla_regionlevel(workspace_aufklaerung_region): ws.save_mets() # FIXME: add result assertions (find_files, parsing PAGE etc) -def test_run_legacy(workspace_manifesto): +def test_run_legacy(workspace_aufklaerung): + # legacy segmentation requires binarized images + run_processor(KrakenBinarize, + input_file_grp="OCR-D-GT-PAGE", + output_file_grp="OCR-D-GT-PAGE-BIN", + **workspace_aufklaerung, + ) run_processor(KrakenSegment, - input_file_grp="OCR-D-IMG-BIN", + # overwrite layout: + input_file_grp="OCR-D-GT-PAGE-BIN", output_file_grp="OCR-D-SEG-LINE-KRAKEN", - parameter={'maxcolseps': 0, 'use_legacy': True}, - **workspace_manifesto, + parameter={'maxcolseps': 0, 'use_legacy': True, 'overwrite_segments': True}, + **workspace_aufklaerung, ) - ws = workspace_manifesto['workspace'] + ws = workspace_aufklaerung['workspace'] ws.save_mets() # FIXME: add result assertions (find_files, parsing PAGE etc)