-
Notifications
You must be signed in to change notification settings - Fork 283
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b6cd898
commit e19133d
Showing
4 changed files
with
186 additions
and
86 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,42 @@ | ||
#!/bin/bash | ||
#SBATCH --nodes 1 | ||
#SBATCH --ntasks-per-node=8 | ||
#SBATCH --ntasks-per-node=6 | ||
#SBATCH --gpus-per-task=1 | ||
#SBATCH --account=efml | ||
#SBATCH --partition=gpu | ||
#SBATCH --time=48:00:00 | ||
#SBATCH --job-name=flamingo | ||
|
||
export PYTHONFAULTHANDLER=1 | ||
export CUDA_LAUNCH_BLOCKING=0 | ||
export HOSTNAMES=`scontrol show hostnames "$SLURM_JOB_NODELIST"` | ||
export MASTER_ADDR=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1) | ||
export MASTER_PORT=15000 | ||
export COUNT_NODE=`scontrol show hostnames "$SLURM_JOB_NODELIST" | wc -l` | ||
export HF_DATASETS_CACHE="/gscratch/efml/anasa2/.huggingface" TRANSFORMERS_CACHE="/gscratch/efml/anasa2/.huggingface" | ||
|
||
export PYTHONPATH="$PYTHONPATH:open_flamingo" | ||
srun --cpu_bind=v --accel-bind=gn python open_flamingo/open_flamingo/train/train.py \ | ||
--lm_path anas-awadalla/mpt-1b-redpajama-200b \ | ||
--tokenizer_path anas-awadalla/mpt-1b-redpajama-200b \ | ||
--cross_attn_every_n_layers 1 \ | ||
srun --cpu_bind=v --accel-bind=gn python | ||
|
||
|
||
|
||
deepspeed open_flamingo/open_flamingo/train/train.py \ | ||
--lm_path anas-awadalla/mpt-7b \ | ||
--tokenizer_path anas-awadalla/mpt-7b \ | ||
--cross_attn_every_n_layers 4 \ | ||
--dataset_resampled \ | ||
--batch_size_mmc4 32 \ | ||
--batch_size_laion 64 \ | ||
--batch_size_mmc4 16 \ | ||
--batch_size_laion 32 \ | ||
--deepspeed \ | ||
--train_num_samples_mmc4 125000\ | ||
--train_num_samples_laion 250000 \ | ||
--loss_multiplier_laion 0.2 \ | ||
--workers=4 \ | ||
--run_name OpenFlamingo-3B-vitl-mpt1b \ | ||
--run_name "deepspeed" \ | ||
--num_epochs 480 \ | ||
--warmup_steps 1875 \ | ||
--mmc4_textsim_threshold 0.24 \ | ||
--laion_shards "/path/to/shards/shard-{0000..0999}.tar" \ | ||
--mmc4_shards "/path/to/shards/shard-{0000..0999}.tar" \ | ||
--warmup_steps 0 \ | ||
--mmc4_textsim_threshold 0.0 \ | ||
--laion_shards "/mmfs1/gscratch/efml/anasa2/laion-samples/{000000..000001}.tar" \ | ||
--mmc4_shards "/mmfs1/gscratch/efml/anasa2/mmc4-samples/shard_{0..1}-000000000.tar" \ | ||
--gradient_checkpointing \ | ||
--report_to_wandb \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters