From 4a9920d4041d4409d6d31359ac936f958c6dd12d Mon Sep 17 00:00:00 2001
From: FindHao <yhao@meta.com>
Date: Thu, 10 Oct 2024 11:48:16 -0700
Subject: [PATCH] keep name same with torchbench

---
 torchbenchmark/util/triton_op.py | 2 +-
 userbenchmark/triton/run.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/torchbenchmark/util/triton_op.py b/torchbenchmark/util/triton_op.py
index 1f6ee771e..f5db8e773 100644
--- a/torchbenchmark/util/triton_op.py
+++ b/torchbenchmark/util/triton_op.py
@@ -850,7 +850,7 @@ def _init_extra_metrics() -> Dict[str, Any]:
                 or "gpu_peak_mem" in self.required_metrics
             ):
                 metrics.cpu_peak_mem, _device_id, metrics.gpu_peak_mem = (
-                    self.get_peak_mem(fn, self.tb_args.metrics_memory_usage_backend)
+                    self.get_peak_mem(fn, self.tb_args.metrics_gpu_backend)
                 )
             if not baseline and "accuracy" in self.required_metrics:
                 metrics.accuracy = (
diff --git a/userbenchmark/triton/run.py b/userbenchmark/triton/run.py
index 2f5f2eec9..e66cf854f 100644
--- a/userbenchmark/triton/run.py
+++ b/userbenchmark/triton/run.py
@@ -93,7 +93,7 @@ def get_parser(args=None):
         help="Metrics to collect, split with comma. E.g., --metrics latency,tflops,speedup.",
     )
     parser.add_argument(
-        "--metrics-memory-usage-backend",
+        "--metrics-gpu-backend",
         choices=["default", "torch"],
         default="default",
         help="Specify the backend [default, torch] to collect metrics. In all modes, the latency (execution time) is always collected using `time.time_ns()`. The CPU peak memory usage is collected by `psutil.Process()`. In default mode, the GPU peak memory usage is collected by the `nvml` library. In torch mode, the GPU peak memory usage is collected by `torch.cuda.max_memory_allocated()`.",