mosaicml · vchiley · Jun 29, 2023 · Jun 29, 2023 · Jun 29, 2023 · Jun 29, 2023
@@ -0,0 +1,175 @@
+tokenizer_name: mosaicml/mpt-30b
+max_seq_len: 8192
+global_seed: 17
+
+# Run Name
+run_name: # If left blank, will be read from env var $COMPOSER_RUN_NAME
+
+# Model
+model:
+  name: hf_causal_lm
+  pretrained: true
+  pretrained_model_name_or_path: mosaicml/mpt-30b
+  init_device: mixed
+  config_overrides:
+    max_seq_len: ${max_seq_len}
+    attn_config:
+      attn_impl: triton
+      # Note: we still use packing, but turn this off for memory.
+      # Initial tests suggest this does not hurt performance too much.
+      attn_uses_sequence_id: false
+
+# Tokenizer
+tokenizer:
+  name: ${tokenizer_name}
+  kwargs:
+    model_max_length: ${max_seq_len}
+
+# Dataloaders
+train_loader:
+  name: finetuning
+  dataset:
+    hf_name: mosaicml/instruct-v3
+    split: train
+    max_seq_len: ${max_seq_len}
+    allow_pad_trimming: false
+    decoder_only_format: true
+    packing_ratio: 9
+    shuffle: true
+  drop_last: true
+  num_workers: 8
+  pin_memory: false
+  prefetch_factor: 2
+  persistent_workers: true
+  timeout: 0
+
+eval_loader:
+  name: finetuning
+  dataset:
+    hf_name: mosaicml/instruct-v3
+    split: test
+    max_seq_len: ${max_seq_len}
+    allow_pad_trimming: false
+    decoder_only_format: true
+    packing_ratio: 9
+    shuffle: true
+  drop_last: true
+  num_workers: 8
+  pin_memory: false
+  prefetch_factor: 2
+  persistent_workers: true
+  timeout: 0
+
+# Optimization
+scheduler:
+  name: linear_decay_with_warmup  # linear no warmup is HF default which dolly used
+  t_warmup: 50ba
+  alpha_f: 0
+
+optimizer:
+  # mimic InstructGPT
+  name: decoupled_adamw
+  lr: 9.65e-6
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-8
+  weight_decay: 0
+
+algorithms:
+  gradient_clipping:
+    clipping_type: norm
+    clipping_threshold: 1.0
+
+max_duration: 8ep
+eval_interval: 1ep
+eval_first: true
+global_train_batch_size: 72
+
+# System
+seed: ${global_seed}
+device_eval_batch_size: 4
+device_train_microbatch_size: 1
+precision: amp_bf16
+
+# FSDP
+fsdp_config:
+  sharding_strategy: FULL_SHARD
+  mixed_precision: PURE
+  activation_checkpointing: true
+  activation_checkpointing_reentrant: false
+  activation_cpu_offload: false
+  limit_all_gathers: true
+  sync_module_states: true
+  state_dict_type: local
+  verbose: false
+
+# Logging
+progress_bar: false
+log_to_console: true
+console_log_interval: 20ba
+
+callbacks:
+  speed_monitor:
+    window_size: 10
+  runtime_estimator: {}
+  lr_monitor: {}
+
+# loggers:
+#   wandb: {}
+
+# save_folder:
+# save_interval: 3ep
+# save_num_checkpoints_to_keep: 1
+
+# need to use converted checkpoint with llm-foundry code
+# load_path:
+autoresume: false
+load_weights_only: false
+python_log_level: debug
+
+
+icl_max_seq_len: 2048
+
+# YOU MUST ADD YOUR OWN DATASET URIs
+# this section can be removed if you do not want to track these metrics
+icl_tasks:
+-
+  label: piqa
+  dataset_uri:  # ADD YOUR OWN DATASET URI
+  num_fewshot:
+  - 0
+  batch_size: 4
+  max_seq_len: ${icl_max_seq_len}
+  icl_task_type: multiple_choice
+  metric_names:
+  - InContextLearningMultipleChoiceAccuracy
+  prompt_string: '' # this goes at the beginning of each input
+  example_delimiter: '\n' # this goes between fewshot examples
+  continuation_delimiter: ' ' # this separates questions from answers
+-
+  label: hellaswag
+  dataset_uri: # ADD YOUR OWN DATASET URI
+  num_fewshot:
+  - 0
+  batch_size: 4
+  max_seq_len: ${icl_max_seq_len}
+  icl_task_type: multiple_choice
+  metric_names:
+  - InContextLearningMultipleChoiceAccuracy
+  prompt_string: '' # this goes at the beginning of each input
+  example_delimiter: '\n' # this goes between fewshot examples
+  continuation_delimiter: ' ' # this separates questions from answers
+-
+  label: arc_easy
+  dataset_uri: # ADD YOUR OWN DATASET URI
+  num_fewshot:
+  - 0
+  batch_size: 4
+  max_seq_len: ${icl_max_seq_len}
+  icl_task_type: multiple_choice
+  metric_names:
+  - InContextLearningMultipleChoiceAccuracy
+  prompt_string: '' # this goes at the beginning of each input
+  example_delimiter: '\n' # this goes between fewshot examples
+  continuation_delimiter: ' ' # this separates questions from answers