modify script

mosaicml · Aug 23, 2023 · fdfa2fa · fdfa2fa
1 parent e857cfc
commit fdfa2fa
Show file tree

Hide file tree

Showing 4 changed files with 160 additions and 86 deletions.
diff --git a/scripts/train/benchmarking/act_ckpt_optimize.sh b/scripts/train/benchmarking/act_ckpt_optimize.sh
@@ -1,19 +1,21 @@
 #!/bin/bash
 
-PROJECT="ackckpt"
+PROJECT="ackckptqt"
 TORCH_2_IMAGE="mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04"
 CLUSTER_80GB=r1z1
-GIT_COMMIT=v0.2.0
+GIT_BRANCH=run_initial
 
-for MB_SIZE in 1 2 4 8
+for MB_SIZE in 1 2 4 6 8
 do
-    for GATH_LMT in true false
-    do
-        for CPU_OFFLOAD in true false
-        do
-            python submit_benchmarks.py --project $PROJECT -m 3b.yaml -g 8 --microbatch_size  $MB_SIZE --accum  2 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb -t bf16 --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_limit_all_gathers $GATH_LMT --activation_cpu_offload $CPU_OFFLOAD --fsdp_config_activation_checkpointing false
-            python submit_benchmarks.py --project $PROJECT -m 7b.yaml -g 8 --microbatch_size  $MB_SIZE --accum 2 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb -t bf16 --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_limit_all_gathers $GATH_LMT --activation_cpu_offload $CPU_OFFLOAD --fsdp_config_activation_checkpointing false
-            python submit_benchmarks.py --project $PROJECT -m 13b.yaml -g 8 --microbatch_size  $MB_SIZE --accum  2 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb -t bf16 --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_limit_all_gathers $GATH_LMT --activation_cpu_offload $CPU_OFFLOAD --fsdp_config_activation_checkpointing false
-        done
-    done
-done
+# #     for GATH_LMT in true false
+# #     do
+# #         for CPU_OFFLOAD in true false
+# #         do
+    python submit_benchmarks.py --project $PROJECT -m 3b.yaml -g 8 --microbatch_size  $MB_SIZE --accum  2 --image $TORCH_2_IMAGE --git_branch $GIT_BRANCH --gpu_type a100_40gb -t bf16 --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+    python submit_benchmarks.py --project $PROJECT -m 7b.yaml -g 8 --microbatch_size $MB_SIZE --accum 2 --image $TORCH_2_IMAGE --git_branch $GIT_BRANCH --gpu_type a100_80gb -t bf16 --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+    python submit_benchmarks.py --project $PROJECT -m 13b.yaml -g 8 --microbatch_size  $MB_SIZE --accum  2 --image $TORCH_2_IMAGE --git_branch $GIT_BRANCH --gpu_type a100_80gb -t bf16 --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+# #         done
+# #     done
+done
+
+# python submit_benchmarks.py --project $PROJECT -m 3b.yaml -g 8 --microbatch_size 12 --accum  2 --image $TORCH_2_IMAGE --git_branch $GIT_BRANCH --gpu_type a100_80gb -t bf16 --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_limit_all_gathers $GATH_LMT --activation_cpu_offload $CPU_OFFLOAD --fsdp_config_activation_checkpointing false
diff --git a/scripts/train/benchmarking/submit_benchmarks.py b/scripts/train/benchmarking/submit_benchmarks.py
@@ -187,7 +187,7 @@ def parse_args():
     parser.add_argument('--torch_compile_fullgraph', type=str_to_bool, default=None)
     parser.add_argument('--torch_compile_dynamic', type=str_to_bool, default=None)
     parser.add_argument('--torch_compile_mode', type=str, default=None)
-
+    parser.add_argument('--torch_compile', type=str_to_bool, default=False)
     parser.add_argument('--RUN',
                         type=str_to_bool,
                         nargs='?',
@@ -283,7 +283,8 @@ def mod_parameters(parameters: Dict[str, Any],
                    pad_vocab_multiple: Optional[int] = None,
                    torch_compile_fullgraph: Optional[bool] = None,
                    torch_compile_dynamic: Optional[bool] = None,
-                   torch_compile_mode: Optional[str] = None
+                   torch_compile_mode: Optional[str] = None,
+                   torch_compile: bool = False
                    ):
     if run_name:
         parameters['run_name'] = run_name
@@ -349,16 +350,14 @@ def mod_parameters(parameters: Dict[str, Any],
         parameters['fsdp_config']['backward_prefetch'] = fsdp_config_backward_prefetch
     if activation_cpu_offload is not None:
         parameters['fsdp_config']['activation_cpu_offload'] = activation_cpu_offload
-    parameters['fsdp_config']['verbose'] = True
-
-
-    parameters['compile_config'] = {}
-    if torch_compile_fullgraph is not None:
-        parameters['compile_config']['fullgraph'] = torch_compile_fullgraph
-    if torch_compile_dynamic is not None:
-       parameters['compile_config']['dynamic'] = torch_compile_dynamic
-    if torch_compile_mode is not None:
-        parameters['compile_config']['mode'] = torch_compile_mode
+    # parameters['fsdp_config']['verbose'] = True
+    parameters['compile_config'] = {} if torch_compile else None
+    # if torch_compile_fullgraph is not None:
+    #     parameters['compile_config']['fullgraph'] = torch_compile_fullgraph
+    # if torch_compile_dynamic is not None:
+    #    parameters['compile_config']['dynamic'] = torch_compile_dynamic
+    # if torch_compile_mode is not None:
+    #     parameters['compile_config']['mode'] = torch_compile_mode
 
     if wandb:
         # add wandb
@@ -471,8 +470,11 @@ def run_config(config: Tuple[str, int, int, str, str, int, str],
         pad_vocab_multiple=args.pad_vocab_multiple,
         torch_compile_fullgraph = args.torch_compile_fullgraph,
         torch_compile_dynamic = args.torch_compile_dynamic,
-        torch_compile_mode = args.torch_compile_mode
+        torch_compile_mode = args.torch_compile_mode,
+        torch_compile = args.torch_compile
         )
+    if args.torch_compile: 
+        assert(parameters['model']['attn_config']['attn_impl'] != 'triton')
     if gpu_type == 'h100_80gb' and precision == 'fp8':
         parameters['model']['fc_type'] = 'te'
     # Create run config mcli sdk/api