forked from JohnSnowLabs/spark-nlp-workshop
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
74 lines (58 loc) · 2.18 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#Download base image ubuntu 18.04
FROM ubuntu:18.04
ENV NB_USER jovyan
ENV NB_UID 1000
ENV HOME /home/${NB_USER}
ENV PYSPARK_PYTHON=python3
ENV PYSPARK_DRIVER_PYTHON=python3
RUN apt-get update && apt-get install -y \
tar \
wget \
bash \
rsync \
gcc \
libfreetype6-dev \
libhdf5-serial-dev \
libpng-dev \
libzmq3-dev \
python3 \
python3-dev \
python3-pip \
unzip \
pkg-config \
software-properties-common
RUN adduser --disabled-password \
--gecos "Default user" \
--uid ${NB_UID} \
${NB_USER}
ENV JAVA_VER 8
ENV JAVA_HOME /usr/lib/jvm/java-8-oracle
RUN echo 'deb http://ppa.launchpad.net/webupd8team/java/ubuntu trusty main' >> /etc/apt/sources.list && \
echo 'deb-src http://ppa.launchpad.net/webupd8team/java/ubuntu trusty main' >> /etc/apt/sources.list && \
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C2518248EEA14886 && \
apt-get update && \
echo oracle-java${JAVA_VER}-installer shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections && \
apt-get install -y --force-yes --no-install-recommends oracle-java${JAVA_VER}-installer oracle-java${JAVA_VER}-set-default && \
apt-get clean && \
rm -rf /var/cache/oracle-jdk${JAVA_VER}-installer
RUN update-java-alternatives -s java-8-oracle
RUN echo "export JAVA_HOME=/usr/lib/jvm/java-8-oracle" >> ~/.bashrc
RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
RUN pip3 install --upgrade pip
RUN pip3 install --no-cache-dir notebook==5.* numpy pyspark spark-nlp Keras scikit-spark scikit-learn scipy matplotlib pydot
RUN wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/spark-nlp-resources/glove.6B.100d.zip && \
mkdir -p /home/jovyan/data/embeddings/ && \
unzip glove.6B.100d.zip -d /home/jovyan/data/embeddings && \
rm glove.6B.100d.zip
# Make sure the contents of our repo are in ${HOME}
RUN mkdir -p /home/jovyan/strata
RUN mkdir -p /home/jovyan/jupyter
COPY data ${HOME}/data
COPY jupyter ${HOME}/jupyter
COPY strata ${HOME}/strata
USER root
RUN chown -R ${NB_UID} ${HOME}
USER ${NB_USER}
WORKDIR ${HOME}
# Specify the default command to run
CMD ["jupyter", "notebook", "--ip", "0.0.0.0"]