-
Notifications
You must be signed in to change notification settings - Fork 11
/
Dockerfile
70 lines (61 loc) · 2.48 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
FROM docker.io/ocrd/core:v2.67.2 AS base
# set proper locales
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
# install ocrd-tesserocr (until here commands for installing tesseract-ocr)
ARG VCS_REF
ARG BUILD_DATE
LABEL \
maintainer="https://ocr-d.de/kontakt" \
org.label-schema.vcs-ref=$VCS_REF \
org.label-schema.vcs-url="https://github.com/OCR-D/ocrd_tesserocr" \
org.label-schema.build-date=$BUILD_DATE
ENV PYTHONIOENCODING utf8
# set frontend non-interactive to silence interactive tzdata config
ARG DEBIAN_FRONTEND=noninteractive
# set proper date and timezone in container
RUN echo "Europe/Berlin" > /etc/timezone
RUN ln -sf /usr/share/zoneinfo/Europe/Berlin /etc/localtime
RUN dpkg-reconfigure -f noninteractive tzdata
# diagnostic output - check timezone settings
# RUN cat /etc/timezone
# avoid HOME/.local/share (hard to predict USER here)
# so let XDG_DATA_HOME coincide with fixed system location
# (can still be overridden by derived stages)
ENV XDG_DATA_HOME /usr/local/share
ENV XDG_CONFIG_HOME /usr/local/share/ocrd-resources
ENV TESSDATA_PREFIX $XDG_DATA_HOME/tessdata
WORKDIR /build/ocrd_tesserocr
COPY setup.py .
COPY ocrd_tesserocr/ocrd-tool.json .
COPY README.md .
COPY requirements.txt .
COPY requirements_test.txt .
COPY .git .git
COPY .gitmodules .
COPY ocrd_tesserocr ocrd_tesserocr
COPY repo/tesserocr repo/tesserocr
COPY repo/tesseract repo/tesseract
COPY Makefile .
RUN make deps-ubuntu \
&& make -j4 install-tesseract \
&& make -j4 install-tesseract-training \
&& make deps install \
&& rm -rf /build/ocrd_tesserocr \
&& apt-get -y remove --auto-remove g++ libtesseract-dev make
RUN ocrd resmgr download ocrd-tesserocr-recognize Fraktur.traineddata && \
ocrd resmgr download ocrd-tesserocr-recognize deu.traineddata && \
# clean possibly created log-files/dirs of ocrd_network logger to prevent permission problems
rm -rf /tmp/ocrd_*
# as discussed in ocrd_all#378, we do not want to manage more than one resource location
# to mount for model persistence;
# with named volumes, the preinstalled models will be copied to the host and complemented
# by downloaded models;
# tessdata is the only problematic module location
RUN mkdir -p $XDG_CONFIG_HOME
RUN mv $TESSDATA_PREFIX $XDG_CONFIG_HOME/ocrd-tesserocr-recognize
RUN ln -s $XDG_CONFIG_HOME/ocrd-tesserocr-recognize $TESSDATA_PREFIX
# finally, alias/symlink all ocrd-resources to /models for shorter mount commands
RUN mv $XDG_CONFIG_HOME /models && ln -s /models $XDG_CONFIG_HOME
WORKDIR /data
VOLUME /data