Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 30b IFT example yaml #388

Merged
merged 4 commits into from
Jun 29, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 175 additions & 0 deletions scripts/train/yamls/finetune/mpt-30b-instruct.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
tokenizer_name: mosaicml/mpt-30b
max_seq_len: 8192
global_seed: 17

# Run Name
run_name: # If left blank, will be read from env var $COMPOSER_RUN_NAME

# Model
model:
name: hf_causal_lm
pretrained: true
pretrained_model_name_or_path: mosaicml/mpt-30b
init_device: mixed
config_overrides:
max_seq_len: ${max_seq_len}
attn_config:
attn_impl: triton
# Note: we still use packing, but turn this off for memory.
# Initial tests suggest this does not hurt performance too much.
attn_uses_sequence_id: false

# Tokenizer
tokenizer:
name: ${tokenizer_name}
kwargs:
model_max_length: ${max_seq_len}

# Dataloaders
train_loader:
name: finetuning
dataset:
hf_name: mosaicml/instruct-v3
split: train
max_seq_len: ${max_seq_len}
allow_pad_trimming: false
decoder_only_format: true
packing_ratio: 9
shuffle: true
drop_last: true
num_workers: 8
pin_memory: false
prefetch_factor: 2
persistent_workers: true
timeout: 0

eval_loader:
name: finetuning
dataset:
hf_name: mosaicml/instruct-v3
split: test
max_seq_len: ${max_seq_len}
allow_pad_trimming: false
decoder_only_format: true
packing_ratio: 9
shuffle: true
drop_last: true
num_workers: 8
pin_memory: false
prefetch_factor: 2
persistent_workers: true
timeout: 0

# Optimization
scheduler:
name: linear_decay_with_warmup # linear no warmup is HF default which dolly used
t_warmup: 50ba
alpha_f: 0

optimizer:
# mimic InstructGPT
name: decoupled_adamw
lr: 9.65e-6
betas:
- 0.9
- 0.95
eps: 1.0e-8
weight_decay: 0

algorithms:
gradient_clipping:
clipping_type: norm
clipping_threshold: 1.0

max_duration: 8ep
eval_interval: 1ep
eval_first: true
global_train_batch_size: 72

# System
seed: ${global_seed}
device_eval_batch_size: 4
device_train_microbatch_size: 1
precision: amp_bf16

# FSDP
fsdp_config:
sharding_strategy: FULL_SHARD
mixed_precision: PURE
activation_checkpointing: true
activation_checkpointing_reentrant: false
activation_cpu_offload: false
limit_all_gathers: true
sync_module_states: true
state_dict_type: local
verbose: false

# Logging
progress_bar: false
log_to_console: true
console_log_interval: 20ba

callbacks:
speed_monitor:
window_size: 10
runtime_estimator: {}
lr_monitor: {}

# loggers:
# wandb: {}

# save_folder:
# save_interval: 3ep
# save_num_checkpoints_to_keep: 1

# need to use converted checkpoint with llm-foundry code
# load_path:
autoresume: false
load_weights_only: false
python_log_level: debug


icl_max_seq_len: 2048

# YOU MUST ADD YOUR OWN DATASET URIs
# this section can be removed if you do not want to track these metrics
icl_tasks:
-
label: piqa
dataset_uri: # ADD YOUR OWN DATASET URI
num_fewshot:
- 0
batch_size: 4
max_seq_len: ${icl_max_seq_len}
icl_task_type: multiple_choice
metric_names:
- InContextLearningMultipleChoiceAccuracy
prompt_string: '' # this goes at the beginning of each input
example_delimiter: '\n' # this goes between fewshot examples
continuation_delimiter: ' ' # this separates questions from answers
-
label: hellaswag
dataset_uri: # ADD YOUR OWN DATASET URI
num_fewshot:
- 0
batch_size: 4
max_seq_len: ${icl_max_seq_len}
icl_task_type: multiple_choice
metric_names:
- InContextLearningMultipleChoiceAccuracy
prompt_string: '' # this goes at the beginning of each input
example_delimiter: '\n' # this goes between fewshot examples
continuation_delimiter: ' ' # this separates questions from answers
-
label: arc_easy
dataset_uri: # ADD YOUR OWN DATASET URI
num_fewshot:
- 0
batch_size: 4
max_seq_len: ${icl_max_seq_len}
icl_task_type: multiple_choice
metric_names:
- InContextLearningMultipleChoiceAccuracy
prompt_string: '' # this goes at the beginning of each input
example_delimiter: '\n' # this goes between fewshot examples
continuation_delimiter: ' ' # this separates questions from answers