Skip to content

Commit

Permalink
Merge pull request #86 from qurator-spk/eynollah_light
Browse files Browse the repository at this point in the history
Eynollah light integration
  • Loading branch information
cneud authored May 13, 2023
2 parents 52d2e0b + 48f2ce6 commit fd9431a
Show file tree
Hide file tree
Showing 10 changed files with 1,376 additions and 595 deletions.
16 changes: 11 additions & 5 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ version: 2
jobs:

build-python37:
docker:
- image: python:3.7
machine:
- image: ubuntu-2004:2023.02.1

steps:
- checkout
- restore_cache:
Expand All @@ -16,12 +17,15 @@ jobs:
paths:
models_eynollah.tar.gz
models_eynollah
- run:
name: "Set Python Version"
command: pyenv install -s 3.7.16 && pyenv global 3.7.16
- run: make install
- run: make smoke-test

build-python38:
docker:
- image: python:3.8
machine:
- image: ubuntu-2004:2023.02.1
steps:
- checkout
- restore_cache:
Expand All @@ -33,13 +37,15 @@ jobs:
paths:
models_eynollah.tar.gz
models_eynollah
- run:
name: "Set Python Version"
command: pyenv install -s 3.8.16 && pyenv global 3.8.16
- run: make install
- run: make smoke-test

workflows:
version: 2
build:
jobs:
#- build-python36
- build-python37
- build-python38
4 changes: 2 additions & 2 deletions .github/workflows/test-eynollah.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.7'] # '3.8'
python-version: ['3.7', '3.8']

steps:
- uses: actions/checkout@v2
Expand All @@ -33,4 +33,4 @@ jobs:
pip install .
pip install -r requirements-test.txt
- name: Test with pytest
run: make test
run: make test
8 changes: 6 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,14 @@ help:
models: models_eynollah

models_eynollah: models_eynollah.tar.gz
tar xf models_eynollah.tar.gz
# tar xf models_eynollah_renamed.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/'
# tar xf models_eynollah_renamed.tar.gz
tar xf 2022-04-05.SavedModel.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/'

models_eynollah.tar.gz:
wget 'https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz'
# wget 'https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz'
# wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz'
wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz'

# Install with pip
install:
Expand Down
227 changes: 69 additions & 158 deletions README.md

Large diffs are not rendered by default.

61 changes: 52 additions & 9 deletions qurator/eynollah/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
"-i",
help="image filename",
type=click.Path(exists=True, dir_okay=False),
required=True,
)
@click.option(
"--out",
Expand All @@ -19,6 +18,12 @@
type=click.Path(exists=True, file_okay=False),
required=True,
)
@click.option(
"--dir_in",
"-di",
help="directory of images",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--model",
"-m",
Expand Down Expand Up @@ -50,6 +55,12 @@
help="if a directory is given, all plots needed for documentation will be saved there",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--save_page",
"-sp",
help="if a directory is given, page crop of image will be saved there",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--enable-plotting/--disable-plotting",
"-ep/-noep",
Expand All @@ -66,7 +77,13 @@
"--curved-line/--no-curvedline",
"-cl/-nocl",
is_flag=True,
help="if this parameter set to true, this tool will try to return contoure of textlines instead of rectabgle bounding box of textline. This should be taken into account that with this option the tool need more time to do process.",
help="if this parameter set to true, this tool will try to return contoure of textlines instead of rectangle bounding box of textline. This should be taken into account that with this option the tool need more time to do process.",
)
@click.option(
"--textline_light/--no-textline_light",
"-tll/-notll",
is_flag=True,
help="if this parameter set to true, this tool will try to return contoure of textlines instead of rectangle bounding box of textline with a faster method.",
)
@click.option(
"--full-layout/--no-full-layout",
Expand All @@ -93,11 +110,23 @@
help="if this parameter set to true, this tool would check the scale and if needed it will scale it to perform better layout detection",
)
@click.option(
"--headers-off/--headers-on",
"--headers_off/--headers-on",
"-ho/-noho",
is_flag=True,
help="if this parameter set to true, this tool would ignore headers role in reading order",
)
@click.option(
"--light_version/--original",
"-light/-org",
is_flag=True,
help="if this parameter set to true, this tool would use lighter version",
)
@click.option(
"--ignore_page_extraction/--extract_page_included",
"-ipe/-epi",
is_flag=True,
help="if this parameter set to true, this tool would ignore page extraction",
)
@click.option(
"--log-level",
"-l",
Expand All @@ -107,49 +136,63 @@
def main(
image,
out,
dir_in,
model,
save_images,
save_layout,
save_deskewed,
save_all,
save_page,
enable_plotting,
allow_enhancement,
curved_line,
textline_light,
full_layout,
tables,
input_binary,
allow_scaling,
headers_off,
light_version,
ignore_page_extraction,
log_level
):
if log_level:
setOverrideLogLevel(log_level)
initLogging()
if not enable_plotting and (save_layout or save_deskewed or save_all or save_images or allow_enhancement):
print("Error: You used one of -sl, -sd, -sa, -si or -ae but did not enable plotting with -ep")
if not enable_plotting and (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement):
print("Error: You used one of -sl, -sd, -sa, -sp, -si or -ae but did not enable plotting with -ep")
sys.exit(1)
elif enable_plotting and not (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement):
print("Error: You used -ep to enable plotting but set none of -sl, -sd, -sa, -sp, -si or -ae")
sys.exit(1)
elif enable_plotting and not (save_layout or save_deskewed or save_all or save_images or allow_enhancement):
print("Error: You used -ep to enable plotting but set none of -sl, -sd, -sa, -si or -ae")
if textline_light and not light_version:
print('Error: You used -tll to enable light textline detection but -light is not enabled')
sys.exit(1)
eynollah = Eynollah(
image_filename=image,
dir_out=out,
dir_in=dir_in,
dir_models=model,
dir_of_cropped_images=save_images,
dir_of_layout=save_layout,
dir_of_deskewed=save_deskewed,
dir_of_all=save_all,
dir_save_page=save_page,
enable_plotting=enable_plotting,
allow_enhancement=allow_enhancement,
curved_line=curved_line,
textline_light=textline_light,
full_layout=full_layout,
tables=tables,
input_binary=input_binary,
allow_scaling=allow_scaling,
headers_off=headers_off,
light_version=light_version,
ignore_page_extraction=ignore_page_extraction,
)
pcgts = eynollah.run()
eynollah.writer.write_pagexml(pcgts)
eynollah.run()
#pcgts = eynollah.run()
##eynollah.writer.write_pagexml(pcgts)

if __name__ == "__main__":
main()
Loading

0 comments on commit fd9431a

Please sign in to comment.