Skip to content

Commit

Permalink
Merge pull request #66 from rg3515/main
Browse files Browse the repository at this point in the history
Cloudmask_v0.5
  • Loading branch information
laszewsk authored Sep 27, 2023
2 parents 80c779b + e07b3d7 commit 8bc75e5
Show file tree
Hide file tree
Showing 23 changed files with 2,914 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/bin/bash
#

# ####################################
# Runtime Variable
# ####################################

# Array of epochs and times required for jobs

# epochsArray=(1 5 10 20 30 50 80 100 200)
epochsArray=(200)
# timesArray=("00:30:00" "00:40:00" "00:50:00" "01:10:00" "01:30:00" "02:30:00" "3:00:00" "4:00:00" "13:00:00")
timesArray=("13:00:00")
# REPEAT=5
REPEAT=5

# GPU

GPU="v100"
# GPU="a100"

# modified files
slurm_script="tmptest-singularity.slurm"
config_file="config_simple.yaml"

# create directory for new generated files
mkdir slurm_reproduce_files
mkdir config_reproduce_files


#
# #####################################

# Experiments with v100, 1 GPU

# Initial setup for gpu and time for one epoch in simple.slurm file
sed -i 's/--gres=.*/--gres=gpu:'"${GPU}"':1/' $slurm_script

# Initial setup for parameters in config_simple.yaml
sed -i 's/card_name.*/card_name: '"${GPU}"'/' $config_file
sed -i 's/gpu_count.*/gpu_count: 1/' $config_file



# Running 5 jobs and then waiting for them to complete before other commands
for((i=1; i<=$REPEAT; i++)); do
for j in ${!epochsArray[@]}; do
sed -i 's/epoch:.*/epoch: '"${epochsArray[$j]}"'/' $config_file
sed -i 's/--job-name=.*/--job-name=cloudmask-gpu-greene-epoch-'"${epochsArray[$j]}"'/' $slurm_script
sed -i 's/--time=.*/--time='"${timesArray[$j]}"'/' $slurm_script

# Creating temporary copies
cp $config_file config_reproduce_files/config_simple_${epochsArray[$j]}_epochs_${i}.yaml
cp $slurm_script slurm_reproduce_files/simple_${epochsArray[$j]}_epochs_${i}.slurm

# Editing paths to log files in the config files
sed -i 's/log_file:.*/log_file: \.\/cloudmask_'"${epochsArray[$j]}"'_epochs_'"${i}"'.log/' config_reproduce_files/config_simple_${epochsArray[$j]}_epochs_${i}.yaml
sed -i 's/mlperf_logfile:.*/mlperf_logfile: \.\/mlperf_cloudmask_'"${epochsArray[$j]}"'_epochs_'"${i}"'.log/' config_reproduce_files/config_simple_${epochsArray[$j]}_epochs_${i}.yaml

# Editing and running them
sed -i 's/repeat:.*/repeat: "'"$i"'"/' config_reproduce_files/config_simple_${epochsArray[$j]}_epochs_${i}.yaml
sed -i 's/--config config_simple\.yaml*/--config config_reproduce_files\/config_simple_'"${epochsArray[$j]}"'_epochs_'"${i}"'\.yaml/g' slurm_reproduce_files/simple_${epochsArray[$j]}_epochs_${i}.slurm


sbatch slurm_reproduce_files/simple_${epochsArray[$j]}_epochs_${i}.slurm
done;
done;

92 changes: 92 additions & 0 deletions benchmarks/cloudmask/target/greene_v0.5/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
SHELL := /bin/bash
AWS_S3=aws s3 --no-sign-request --endpoint-url https://s3.echo.stfc.ac.uk
NAME=cloudmask
VENV=/scratch/$(USER)/ENV3

.PHONY: data clean project localscratch singularity stop singularity.json

all: project localscratch shm generate

all: requirements data

requirements:
pip install -r $(PROJECT_DIR)/experiments/greene/requirements.txt

yaml:
sed -i 's/USERTOREPLACE/$(USER)/g' $(PROJECT_DIR)/experiments/greene/config.yaml


data:
mkdir -p $(PROJECT_DATA)/ssts
mkdir -p $(PROJECT_DATA)/one-day
echo -n "Downloading first portion of data..."
cd $(PROJECT_DIR); $(AWS_S3) sync s3://sciml-datasets/es/cloud_slstr_ds1/one-day ./data/one-day --cli-read-timeout 0
echo -n "Downloading second portion of data..."
cd $(PROJECT_DIR); $(AWS_S3) sync s3://sciml-datasets/es/cloud_slstr_ds1/ssts ./data/ssts --cli-read-timeout 0

project: project.json generate

generate: jobs-project.sh

run: submit

submit:
-sh jobs-project.sh

localscratch: localscratch.json

jobs-%.sh: %.json
cms sbatch generate submit --name=$< > $@

%.json: config.in.yaml
source ${VENV}/bin/activate ; \
cms sbatch generate \
--source=slurm.in.sh \
--config=$< \
--name=$(basename $@) \
--noos \
--os=USER \
--output_dir=./$(basename $@) \
--source_dir=. \
--copycode=slstr_cloud.py \
--verbose

kill: stop

stop:
for i in "$$(squeue --user $$USER | awk 'NR>1{print $$1}')"; do scancel $$i ; done

inspect:
$(eval D=$(shell ls project/$(ls -1) | head -n 1))
echo ${D}
$(shell emacs project/${D}/config.yaml project/${D}/job.slurm)

watch: status

status:
watch squeue --format=\"%.18i %.9P %.50j %.8u %.8T %.10M %.9l %.6D %R\" --me


clean:
@-rm -rf localscratch localscratch.json jobs-localscratch.sh
@-rm -rf project project.json jobs-project.sh
@-rm -f greene.slurm
@-rm -rf '__pycache__'
@-rm -rf *~



#
# SINGULARITY IMAGE
#

image:
cd image-singularity; make image
mv image-singularity/${NAME}.sif .

push:
-git push
ssh -tt rivanna "cd /scratch/thf2bn/mlcommons/ ssh-add; git pull"

shell:
singularity shell --nv ${NAME}.sif
Loading

0 comments on commit 8bc75e5

Please sign in to comment.