-
Notifications
You must be signed in to change notification settings - Fork 33
/
Dockerfile
62 lines (43 loc) · 2.2 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
FROM ubuntu:16.04
USER root
RUN apt-get update && apt-get -y dist-upgrade && apt-get install -y openssh-server default-jdk wget scala
RUN apt-get -y update
RUN apt-get -y install zip
RUN apt-get -y install vim
ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
RUN ssh-keygen -t rsa -f $HOME/.ssh/id_rsa -P "" \
&& cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys
RUN wget -O /hadoop.tar.gz -q http://archive.apache.org/dist/hadoop/core/hadoop-2.7.3/hadoop-2.7.3.tar.gz \
&& tar xfz hadoop.tar.gz \
&& mv /hadoop-2.7.3 /usr/local/hadoop \
&& rm /hadoop.tar.gz
RUN wget -O /spark.tar.gz -q https://archive.apache.org/dist/spark/spark-2.4.1/spark-2.4.1-bin-hadoop2.7.tgz
RUN tar xfz spark.tar.gz
RUN mv /spark-2.4.1-bin-hadoop2.7 /usr/local/spark
RUN rm /spark.tar.gz
ENV HADOOP_HOME=/usr/local/hadoop
ENV SPARK_HOME=/usr/local/spark
ENV PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$SPARK_HOME/bin:$SPARK_HOME:sbin
RUN mkdir -p $HADOOP_HOME/hdfs/namenode \
&& mkdir -p $HADOOP_HOME/hdfs/datanode
COPY config/ /tmp/
RUN mv /tmp/ssh_config $HOME/.ssh/config \
&& mv /tmp/hadoop-env.sh $HADOOP_HOME/etc/hadoop/hadoop-env.sh \
&& mv /tmp/core-site.xml $HADOOP_HOME/etc/hadoop/core-site.xml \
&& mv /tmp/hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml \
&& mv /tmp/mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml.template \
&& cp $HADOOP_HOME/etc/hadoop/mapred-site.xml.template $HADOOP_HOME/etc/hadoop/mapred-site.xml \
&& mv /tmp/yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml \
&& cp /tmp/slaves $HADOOP_HOME/etc/hadoop/slaves \
&& mv /tmp/slaves $SPARK_HOME/conf/slaves \
&& mv /tmp/spark/spark-env.sh $SPARK_HOME/conf/spark-env.sh \
&& mv /tmp/spark/log4j.properties $SPARK_HOME/conf/log4j.properties \
&& mv /tmp/spark/spark.defaults.conf $SPARK_HOME/conf/spark.defaults.conf
ADD scripts/spark-services.sh $HADOOP_HOME/spark-services.sh
RUN chmod 744 -R $HADOOP_HOME
RUN $HADOOP_HOME/bin/hdfs namenode -format
EXPOSE 50010 50020 50070 50075 50090 8020 9000
EXPOSE 10020 19888
EXPOSE 8030 8031 8032 8033 8040 8042 8088
EXPOSE 49707 2122 7001 7002 7003 7004 7005 7006 7007 8888 9000
ENTRYPOINT service ssh start; cd $SPARK_HOME; bash