Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DE최우형 - W4M2 #271

Open
wants to merge 9 commits into
base: DE최우형_W4
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,19 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

.DS_Store
._.DS_Store
**/.DS_Store
**/._.DS_Store

.viscid
vscode
.vscode/
vscode/


.untracked
._untracked
**/untracked
**/._untracked
20 changes: 20 additions & 0 deletions missions/W2/M1_4/W2M1 - Multiprocessing: Pool.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#init"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
39 changes: 39 additions & 0 deletions missions/W4/M1/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Base image
FROM ubuntu:22.04

# Install necessary packages
RUN apt-get update && \
apt-get install -y vim wget unzip ssh openjdk-8-jdk python3-pip rsync sudo net-tools

# Create Hadoop user
RUN useradd -ms /bin/bash spark
RUN echo "spark ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers

# Set environment variables
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-arm64
ENV SPARK_HOME /usr/local/spark
ENV PATH $PATH:$SPARK_HOME

# Download and install Hadoop
COPY /untracked/spark-3.5.1-bin-hadoop3.tgz $SPARK_HOME/spark-3.5.1-bin-hadoop3.tgz
RUN tar -zxvf $SPARK_HOME/spark-3.5.1-bin-hadoop3.tgz -C $SPARK_HOME/
RUN mv $SPARK_HOME/spark-3.5.1-bin-hadoop3/* $SPARK_HOME/
RUN rm -rf $SPARK_HOME/spark-3.5.1-bin-hadoop3.tgz
RUN mkdir -p $SPARK_HOME/data
RUN chown -R spark:spark $SPARK_HOME
# Change user
USER spark

# Set environment variables in .bashrc
RUN echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-arm64" >> ~/.bashrc && \
echo "export PATH=\$PATH:\$SPARK_HOME" >> ~/.bashrc && \
echo "export SPARK_HOME=/usr/local/spark" >> ~/.bashrc

# Start Hadoop datanode
COPY /start_script/start_spark.sh $SPARK_HOME/start_spark.sh
COPY /start_script/start_pi.sh $SPARK_HOME/start_pi.sh
RUN sudo chmod +x $SPARK_HOME/start_spark.sh
RUN sudo chmod +x $SPARK_HOME/start_pi.sh

# 시작 명령어 설정
CMD ["/usr/local/spark/start_spark.sh"]
3 changes: 3 additions & 0 deletions missions/W4/M1/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# init

$SPARK_HOME/bin/spark-submit --master spark://spark-master:7077 $SPARK_HOME/examples/src/main/python/pi.py
10 changes: 10 additions & 0 deletions missions/W4/M1/build_and_run_hadoop_services.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

# Build Docker images
echo "Building Spark image..."
docker build -t spark .


# Start services using docker-compose
# echo "Starting Hadoop services..."
docker-compose up -d
42 changes: 42 additions & 0 deletions missions/W4/M1/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
version: '3'
services:
spark-master:
image: spark
container_name: spark-master
hostname: spark-master
ports:
- 8080:8080
volumes:
- spark-master-data:/usr/local/spark/data
environment:
- NODE_TYPE=master

spark-worker1:
image: spark
container_name: spark-worker1
hostname: spark-worker1
ports:
- 8081:8081
volumes:
- spark-worker1-data:/usr/local/spark/data
depends_on:
- spark-master
environment:
- NODE_TYPE=worker

spark-worker2:
image: spark
container_name: spark-worker2
hostname: spark-worker2
ports:
- 8082:8081
volumes:
- spark-worker2-data:/usr/local/spark/data
depends_on:
- spark-master
environment:
- NODE_TYPE=worker
volumes:
spark-master-data:
spark-worker1-data:
spark-worker2-data:
19 changes: 19 additions & 0 deletions missions/W4/M1/start_script/start_pi.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

# Define the directory
LOG_DIR="/usr/local/spark/data/test_log"

# Create the directory if it does not exist
mkdir -p "$LOG_DIR"

# Get the current date and time
CURRENT_TIME=$(date "+%Y-%m-%d_%H-%M-%S")

# Define the log file with date and time
LOG_FILE="$LOG_DIR/PI_OUTPUT_$CURRENT_TIME.log"

# Start the Spark example job and redirect stdout and stderr to the log file
$SPARK_HOME/bin/spark-submit --master spark://spark-master:7077 $SPARK_HOME/examples/src/main/python/pi.py > "$LOG_FILE" 2>&1
$SPARK_HOME/bin/spark-submit --master spark://spark-master:7077 status_api_demo.py > "/usr/local/spark/data/test_log/status_api" 2>&1

$SPARK_HOME/bin/spark-submit --conf spark.eventLog.enabled=true --conf spark.eventLog.dir=hdfs://path_to_eventlog_directory your_spark_job.py
24 changes: 24 additions & 0 deletions missions/W4/M1/start_script/start_spark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash


# $NODE_TYPE에 따라 다른 명령을 수행할꺼야
case $NODE_TYPE in
"master")
$SPARK_HOME/sbin/start-master.sh

# Keep the shell open
tail -f /dev/null
;;
"worker")
$SPARK_HOME/sbin/start-worker.sh spark-master:7077

# Keep the shell open
tail -f /dev/null
;;
*)
echo "Invalid NODE_TYPE: $NODE_TYPE"
exit 1
;;
esac


1 change: 1 addition & 0 deletions missions/W4/M2/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
test.ipynb 노브툭 파일 참조
Loading