-
Notifications
You must be signed in to change notification settings - Fork 1
/
carsotrain_b.sh
executable file
·47 lines (47 loc) · 1.84 KB
/
carsotrain_b.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/bash -li
#SBATCH --job-name=train_carso_sc_b
#SBATCH --mail-type=FAIL,END
#SBATCH --partition=DGX
#SBATCH --time=0-04:00:00
#SBATCH --nodes=1 # Nodes
#SBATCH --ntasks-per-node=4 # GPUs per node
#SBATCH --cpus-per-task=24 # Cores per node / GPUs per node
#SBATCH --mem=256G # 4 * Cores per node
#SBATCH --gres=gpu:4 # GPUs per node
################################################################################
#
sleep 3
#
#source $HOME/.bashrc
#
export CODEHOME="$HOME/Downloads/"
export MYPYTHON="$HOME/micromamba/envs/nightorch/bin/python"
#
export MASTER_PORT=$(expr 10000 + $(echo -n $SLURM_JOBID | tail -c 4))
export WORLD_SIZE=$(($SLURM_NNODES * $SLURM_NTASKS_PER_NODE))
export MASTER_ADDR=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
#
echo " "
echo "hostname="$(hostname)
echo "WORLD_SIZE="$WORLD_SIZE
echo "OMP_NUM_THREADS="$OMP_NUM_THREADS
echo "MASTER_ADDR="$MASTER_ADDR
echo "MASTER_PORT="$MASTER_PORT
echo " "
#
################################################################################
cd "$CODEHOME/CARSO/src/"
#
echo "-----------------------------------------------------------------------------------------------------------------"
echo " "
echo "START TIME "$(date +'%Y_%m_%d-%H_%M_%S')
echo " "
echo "-----------------------------------------------------------------------------------------------------------------"
srun "$MYPYTHON" -O "$CODEHOME/CARSO/src/train_b.py" --dist --save --wandb --batchsize 2560
echo "-----------------------------------------------------------------------------------------------------------------"
echo " "
echo "STOP TIME "$(date +'%Y_%m_%d-%H_%M_%S')
echo " "
echo "-----------------------------------------------------------------------------------------------------------------"
#