diff --git a/scripts/train/benchmarking/act_ckpt_optimize.sh b/scripts/train/benchmarking/act_ckpt_optimize.sh
index b05f22f728..52ff209e8a 100755
--- a/scripts/train/benchmarking/act_ckpt_optimize.sh
+++ b/scripts/train/benchmarking/act_ckpt_optimize.sh
@@ -1,19 +1,21 @@
 #!/bin/bash
 
-PROJECT="ackckpt"
+PROJECT="ackckptqt"
 TORCH_2_IMAGE="mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04"
 CLUSTER_80GB=r1z1
-GIT_COMMIT=v0.2.0
+GIT_BRANCH=run_initial
 
-for MB_SIZE in 1 2 4 8
+for MB_SIZE in 1 2 4 6 8
 do
-    for GATH_LMT in true false
-    do
-        for CPU_OFFLOAD in true false
-        do
-            python submit_benchmarks.py --project $PROJECT -m 3b.yaml -g 8 --microbatch_size  $MB_SIZE --accum  2 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb -t bf16 --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_limit_all_gathers $GATH_LMT --activation_cpu_offload $CPU_OFFLOAD --fsdp_config_activation_checkpointing false
-            python submit_benchmarks.py --project $PROJECT -m 7b.yaml -g 8 --microbatch_size  $MB_SIZE --accum 2 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb -t bf16 --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_limit_all_gathers $GATH_LMT --activation_cpu_offload $CPU_OFFLOAD --fsdp_config_activation_checkpointing false
-            python submit_benchmarks.py --project $PROJECT -m 13b.yaml -g 8 --microbatch_size  $MB_SIZE --accum  2 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb -t bf16 --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_limit_all_gathers $GATH_LMT --activation_cpu_offload $CPU_OFFLOAD --fsdp_config_activation_checkpointing false
-        done
-    done
-done
\ No newline at end of file
+# #     for GATH_LMT in true false
+# #     do
+# #         for CPU_OFFLOAD in true false
+# #         do
+    python submit_benchmarks.py --project $PROJECT -m 3b.yaml -g 8 --microbatch_size  $MB_SIZE --accum  2 --image $TORCH_2_IMAGE --git_branch $GIT_BRANCH --gpu_type a100_40gb -t bf16 --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+    python submit_benchmarks.py --project $PROJECT -m 7b.yaml -g 8 --microbatch_size $MB_SIZE --accum 2 --image $TORCH_2_IMAGE --git_branch $GIT_BRANCH --gpu_type a100_80gb -t bf16 --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+    python submit_benchmarks.py --project $PROJECT -m 13b.yaml -g 8 --microbatch_size  $MB_SIZE --accum  2 --image $TORCH_2_IMAGE --git_branch $GIT_BRANCH --gpu_type a100_80gb -t bf16 --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+# #         done
+# #     done
+done
+
+# python submit_benchmarks.py --project $PROJECT -m 3b.yaml -g 8 --microbatch_size 12 --accum  2 --image $TORCH_2_IMAGE --git_branch $GIT_BRANCH --gpu_type a100_80gb -t bf16 --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_limit_all_gathers $GATH_LMT --activation_cpu_offload $CPU_OFFLOAD --fsdp_config_activation_checkpointing false
\ No newline at end of file
diff --git a/scripts/train/benchmarking/submit_benchmarks.py b/scripts/train/benchmarking/submit_benchmarks.py
index 197aceab2e..6e1050cdf2 100644
--- a/scripts/train/benchmarking/submit_benchmarks.py
+++ b/scripts/train/benchmarking/submit_benchmarks.py
@@ -187,7 +187,7 @@ def parse_args():
     parser.add_argument('--torch_compile_fullgraph', type=str_to_bool, default=None)
     parser.add_argument('--torch_compile_dynamic', type=str_to_bool, default=None)
     parser.add_argument('--torch_compile_mode', type=str, default=None)
-
+    parser.add_argument('--torch_compile', type=str_to_bool, default=False)
     parser.add_argument('--RUN',
                         type=str_to_bool,
                         nargs='?',
@@ -283,7 +283,8 @@ def mod_parameters(parameters: Dict[str, Any],
                    pad_vocab_multiple: Optional[int] = None,
                    torch_compile_fullgraph: Optional[bool] = None,
                    torch_compile_dynamic: Optional[bool] = None,
-                   torch_compile_mode: Optional[str] = None
+                   torch_compile_mode: Optional[str] = None,
+                   torch_compile: bool = False
                    ):
     if run_name:
         parameters['run_name'] = run_name
@@ -349,16 +350,14 @@ def mod_parameters(parameters: Dict[str, Any],
         parameters['fsdp_config']['backward_prefetch'] = fsdp_config_backward_prefetch
     if activation_cpu_offload is not None:
         parameters['fsdp_config']['activation_cpu_offload'] = activation_cpu_offload
-    parameters['fsdp_config']['verbose'] = True
-
-
-    parameters['compile_config'] = {}
-    if torch_compile_fullgraph is not None:
-        parameters['compile_config']['fullgraph'] = torch_compile_fullgraph
-    if torch_compile_dynamic is not None:
-       parameters['compile_config']['dynamic'] = torch_compile_dynamic
-    if torch_compile_mode is not None:
-        parameters['compile_config']['mode'] = torch_compile_mode
+    # parameters['fsdp_config']['verbose'] = True
+    parameters['compile_config'] = {} if torch_compile else None
+    # if torch_compile_fullgraph is not None:
+    #     parameters['compile_config']['fullgraph'] = torch_compile_fullgraph
+    # if torch_compile_dynamic is not None:
+    #    parameters['compile_config']['dynamic'] = torch_compile_dynamic
+    # if torch_compile_mode is not None:
+    #     parameters['compile_config']['mode'] = torch_compile_mode
 
     if wandb:
         # add wandb
@@ -471,8 +470,11 @@ def run_config(config: Tuple[str, int, int, str, str, int, str],
         pad_vocab_multiple=args.pad_vocab_multiple,
         torch_compile_fullgraph = args.torch_compile_fullgraph,
         torch_compile_dynamic = args.torch_compile_dynamic,
-        torch_compile_mode = args.torch_compile_mode
+        torch_compile_mode = args.torch_compile_mode,
+        torch_compile = args.torch_compile
         )
+    if args.torch_compile: 
+        assert(parameters['model']['attn_config']['attn_impl'] != 'triton')
     if gpu_type == 'h100_80gb' and precision == 'fp8':
         parameters['model']['fc_type'] = 'te'
     # Create run config mcli sdk/api
diff --git a/scripts/train/benchmarking/sweep_fsdp.sh b/scripts/train/benchmarking/sweep_fsdp.sh
index 51fa21db7a..358855cf98 100644
--- a/scripts/train/benchmarking/sweep_fsdp.sh
+++ b/scripts/train/benchmarking/sweep_fsdp.sh
@@ -25,71 +25,71 @@ python submit_benchmarks.py --project $PROJECT -m   3b.yaml -g 8 --microbatch_si
 python submit_benchmarks.py --project $PROJECT -m   3b.yaml -g 8 --microbatch_size   12 --accum  8 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing true
 python submit_benchmarks.py --project $PROJECT -m   3b.yaml -g 8 --microbatch_size   16 --accum  8 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing true
 
-python submit_benchmarks.py --project $PROJECT -m   7b.yaml -g 8 --microbatch_size  16 --accum  4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-python submit_benchmarks.py --project $PROJECT -m   7b.yaml -g 8 --microbatch_size  12 --accum  4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/
+python submit_benchmarks.py --project $PROJECT -m   7b.yaml -g 8 --microbatch_size  16 --accum  4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m   7b.yaml -g 8 --microbatch_size  12 --accum  4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN
 
 # Replicate/understand any diffs using streaming data loader
-python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 13 13 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing true
-python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 12 12 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing true
+python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 13 13 --RUN --fsdp_config_activation_checkpointing true
+python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 12 12 --RUN --fsdp_config_activation_checkpointing true
 
 # 80GB Test microbatch size w/ no act_ckpt
-python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 4 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-python submit_benchmarks.py --project $PROJECT -m  3b.yaml -g 8 --microbatch_size 3 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-python submit_benchmarks.py --project $PROJECT -m  3b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-python submit_benchmarks.py --project $PROJECT -m  3b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-
-python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 4 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-python submit_benchmarks.py --project $PROJECT -m  3b.yaml -g 8 --microbatch_size 3 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-python submit_benchmarks.py --project $PROJECT -m  3b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-python submit_benchmarks.py --project $PROJECT -m  3b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-
-python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 4 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-python submit_benchmarks.py --project $PROJECT -m  3b.yaml -g 8 --microbatch_size 3 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-python submit_benchmarks.py --project $PROJECT -m  3b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-python submit_benchmarks.py --project $PROJECT -m  3b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 4 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m  3b.yaml -g 8 --microbatch_size 3 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m  3b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m  3b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+
+python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 4 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m  3b.yaml -g 8 --microbatch_size 3 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m  3b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m  3b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+
+python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 4 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m  3b.yaml -g 8 --microbatch_size 3 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m  3b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m  3b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false
 
 # # Test ack_ckpt differences
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 11 11 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 11 11 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 4 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 11 11 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 3 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 11 11 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 11 11 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 11 11 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 8 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 6 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 4 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false # PASSED
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/  --fsdp_config_activation_checkpointing false # PASSED
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 1 --accum 4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false # PASSED
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 2 --accum 4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 4 --accum 4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-
-
-# python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 1 --accum 4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 2 --accum 4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 4 --accum 4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 8 --accum 4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-
-# python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 4 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 8 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 4 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 3 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 8 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 6 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 4 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false # PASSED
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN  --fsdp_config_activation_checkpointing false # PASSED
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 1 --accum 4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false # PASSED
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 2 --accum 4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 4 --accum 4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+
+
+# python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 1 --accum 4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 2 --accum 4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 4 --accum 4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 8 --accum 4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+
+# python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 1 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 4 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  7b.yaml -g 8 --microbatch_size 8 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 10 10 --RUN --fsdp_config_activation_checkpointing false
 
 # NOTE: Tried the commented ones last night, OOM'd
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 14 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 9 9 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 12 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 9 9 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 10 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 9 9 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 8 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 9 9 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 4 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 9 9 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 4 --accum 8 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 9 9 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 4 --accum 4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 9 9 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 9 9 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false # PASSED
-# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 8 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 7 7 --RUN --data_remote oci://mosaicml-internal-dataset-c4/preconcat-gpt_neox/ --fsdp_config_activation_checkpointing false #PASSED
\ No newline at end of file
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 14 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 12 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 10 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 8 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 4 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 4 --accum 8 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 4 --accum 4 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 2 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false # PASSED
+# python submit_benchmarks.py --project $PROJECT -m  13b.yaml -g 8 --microbatch_size 8 --accum 16 --image $IMAGE --git_commit $GIT_COMMIT --gpu_type a100_40gb --cluster $CLUSTER_40GB -s 7 7 --RUN --fsdp_config_activation_checkpointing false #PASSED
\ No newline at end of file
diff --git a/scripts/train/benchmarking/torch_benchmarks_80gb.sh b/scripts/train/benchmarking/torch_benchmarks_80gb.sh
new file mode 100755
index 0000000000..75f3f52aab
--- /dev/null
+++ b/scripts/train/benchmarking/torch_benchmarks_80gb.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+PROJECT="torches80gb"
+TORCH_2_IMAGE="mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04"
+TORCH_113_IMAGE="mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04"
+CLUSTER_80GB=r1z1
+CLUSTER_40GB=r8z3
+GIT_COMMIT=v0.2.0
+# 30b test Torch Runs
+# python submit_benchmarks.py --project $PROJECT -m 30b.yaml -g 8 --microbatch_size  1 --accum  21 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 12 13 --RUN --fsdp_config_activation_checkpointing true
+# python submit_benchmarks.py --project $PROJECT -m 30b.yaml -g 8 --microbatch_size  3 --accum  21 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing true
+# python submit_benchmarks.py --project $PROJECT -m 30b.yaml -g 8 --microbatch_size  6 --accum  21 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 10 10 --RUN --fsdp_config_activation_checkpointing true
+# python submit_benchmarks.py --project $PROJECT -m 30b.yaml -g 8 --microbatch_size  12 --accum  21 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --fsdp_config_activation_checkpointing true
+
+python submit_benchmarks.py --project $PROJECT -m 30b.yaml -g 8 --microbatch_size  1 --accum  21 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 13 12 --RUN --fsdp_config_activation_checkpointing true
+python submit_benchmarks.py --project $PROJECT -m 30b.yaml -g 8 --microbatch_size  3 --accum  21 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing true
+python submit_benchmarks.py --project $PROJECT -m 30b.yaml -g 8 --microbatch_size  6 --accum  21 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 10 10 --RUN --fsdp_config_activation_checkpointing true
+python submit_benchmarks.py --project $PROJECT -m 30b.yaml -g 8 --microbatch_size  12 --accum  21 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --fsdp_config_activation_checkpointing true
+
+#13b test Torch runs -- seperate Torch1.13 and torch2
+# python submit_benchmarks.py --project $PROJECT -m 13b.yaml -g 8 --microbatch_size  1 --accum  3 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 15 15 --RUN --fsdp_config_activation_checkpointing true
+# python submit_benchmarks.py --project $PROJECT -m 13b.yaml -g 8 --microbatch_size  5 --accum  3 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 13 13 --RUN --fsdp_config_activation_checkpointing true
+# python submit_benchmarks.py --project $PROJECT -m 13b.yaml -g 8 --microbatch_size  20 --accum  3 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing true
+# python submit_benchmarks.py --project $PROJECT -m 13b.yaml -g 8 --microbatch_size  80 --accum  3 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --fsdp_config_activation_checkpointing true
+
+# python submit_benchmarks.py --project $PROJECT -m 13b.yaml -g 8 --microbatch_size  1 --accum  3 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 15 15 --RUN --fsdp_config_activation_checkpointing true
+# python submit_benchmarks.py --project $PROJECT -m 13b.yaml -g 8 --microbatch_size  5 --accum  3 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 13 13 --RUN --fsdp_config_activation_checkpointing true
+# python submit_benchmarks.py --project $PROJECT -m 13b.yaml -g 8 --microbatch_size  20 --accum  3 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing true
+# python submit_benchmarks.py --project $PROJECT -m 13b.yaml -g 8 --microbatch_size  80 --accum  3 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --fsdp_config_activation_checkpointing true
+
+# #7b test torch runs
+# python submit_benchmarks.py --project $PROJECT -m 7b.yaml -g 8 --microbatch_size  2 --accum  2 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 15 15 --RUN --fsdp_config_activation_checkpointing true
+# python submit_benchmarks.py --project $PROJECT -m 7b.yaml -g 8 --microbatch_size  8 --accum  2 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 13 13 --RUN --fsdp_config_activation_checkpointing true
+# python submit_benchmarks.py --project $PROJECT -m 7b.yaml -g 8 --microbatch_size  32 --accum  2 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing true
+# python submit_benchmarks.py --project $PROJECT -m 7b.yaml -g 8 --microbatch_size  128 --accum  2 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --fsdp_config_activation_checkpointing true
+
+# python submit_benchmarks.py --project $PROJECT -m 7b.yaml -g 8 --microbatch_size  2 --accum  2 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 15 15 --RUN --fsdp_config_activation_checkpointing true
+# python submit_benchmarks.py --project $PROJECT -m 7b.yaml -g 8 --microbatch_size  8 --accum  2 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 13 13 --RUN --fsdp_config_activation_checkpointing true
+# python submit_benchmarks.py --project $PROJECT -m 7b.yaml -g 8 --microbatch_size  32 --accum  2 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing true
+# python submit_benchmarks.py --project $PROJECT -m 7b.yaml -g 8 --microbatch_size  128 --accum  2 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --fsdp_config_activation_checkpointing true
+
+#3b test torch runs
+python submit_benchmarks.py --project $PROJECT -m 3b.yaml -g 8 --microbatch_size  3 --accum  6 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 15 15 --RUN --fsdp_config_activation_checkpointing true
+python submit_benchmarks.py --project $PROJECT -m 3b.yaml -g 8 --microbatch_size  3 --accum  6 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 13 13 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m 3b.yaml -g 8 --microbatch_size  10 --accum  6 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m 3b.yaml -g 8 --microbatch_size  40 --accum  6 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false
+
+#3b test torch runs
+python submit_benchmarks.py --project $PROJECT -m 3b.yaml -g 8 --microbatch_size  3 --accum  6 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 15 15 --RUN --fsdp_config_activation_checkpointing true
+python submit_benchmarks.py --project $PROJECT -m 3b.yaml -g 8 --microbatch_size  3 --accum  6 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 13 13 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m 3b.yaml -g 8 --microbatch_size  10 --accum  6 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 11 11 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m 3b.yaml -g 8 --microbatch_size  40 --accum  6 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false
+
+#1b test torch runs
+python submit_benchmarks.py --project $PROJECT -m 1b.yaml -g 8 --microbatch_size  1 --accum  4 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 15 15 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m 1b.yaml -g 8 --microbatch_size  2 --accum  4 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 13 13 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m 1b.yaml -g 8 --microbatch_size  56 --accum  4 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false
+
+python submit_benchmarks.py --project $PROJECT -m 1b.yaml -g 8 --microbatch_size  1 --accum  4 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 15 15 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m 1b.yaml -g 8 --microbatch_size  2 --accum  4 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 13 13 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m 1b.yaml -g 8 --microbatch_size  56 --accum  4 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false
+
+#abbreviate it, 350m
+python submit_benchmarks.py --project $PROJECT -m 350m.yaml -g 8 --microbatch_size  2 --accum  2 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 15 15 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m 350m.yaml -g 8 --microbatch_size  4 --accum  2 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 13 13 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m 350m.yaml -g 8 --microbatch_size  6 --accum  2 --image $TORCH_113_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false
+
+python submit_benchmarks.py --project $PROJECT -m 350m.yaml -g 8 --microbatch_size  1 --accum  4 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 15 15 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m 350m.yaml -g 8 --microbatch_size  2 --accum  4 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 14 14 --RUN --fsdp_config_activation_checkpointing false
+python submit_benchmarks.py --project $PROJECT -m 350m.yaml -g 8 --microbatch_size  56 --accum  4 --image $TORCH_2_IMAGE --git_commit $GIT_COMMIT --gpu_type a100_80gb --cluster $CLUSTER_80GB -s 9 9 --RUN --fsdp_config_activation_checkpointing false