Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Script to Calculate Summary Information for Benchmark Results #271

Merged
merged 28 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
91724e4
benchmark fix
kiddyjinjin Sep 26, 2024
13b5800
merge upstream to this
kiddyjinjin Sep 26, 2024
a4d985b
change three kinds of benchmark level setting to two kinds
kiddyjinjin Sep 27, 2024
84fa232
update the basic settings
kiddyjinjin Sep 27, 2024
4d0bc21
adjust the batch & shape info for all the operators
kiddyjinjin Sep 30, 2024
d347517
merge upstream
kiddyjinjin Sep 30, 2024
f155778
benchmark fix: Refactor benchmark structure design, add interfaces fo…
kiddyjinjin Oct 10, 2024
fc5841e
benchmark fix for special operations
kiddyjinjin Oct 11, 2024
03cf1de
ammend
kiddyjinjin Oct 12, 2024
7eb9be4
merge perf shapes
kiddyjinjin Oct 16, 2024
0a07f8d
Merge remote-tracking branch 'upstream/master'
kiddyjinjin Oct 17, 2024
04cdf48
merge upstream
kiddyjinjin Oct 22, 2024
412c1f4
benchmark fix
kiddyjinjin Oct 22, 2024
7db32d5
specify DEFAULT_SHAPES_EXCLUDE_1D & DEFAULT_SHAPES_EXCLUDE_3D and DEF…
kiddyjinjin Oct 23, 2024
ea3076e
merge upstream/master
kiddyjinjin Oct 23, 2024
949ec70
fix pre-commit
kiddyjinjin Oct 23, 2024
371072c
update CONTRIBUTING.md & CONTRIBUTING_cn.md
kiddyjinjin Oct 23, 2024
182dc0a
for pre-commit
kiddyjinjin Oct 23, 2024
7c57c47
move shapes info to yaml file
kiddyjinjin Oct 29, 2024
f69b181
move shapes info to yaml file
kiddyjinjin Oct 29, 2024
8cb7204
fix record log bug
kiddyjinjin Oct 29, 2024
778fcfb
merge upstream
kiddyjinjin Oct 29, 2024
cfecc5e
pre-commit fix
kiddyjinjin Oct 30, 2024
4f63e88
fix json encode bug: when meeting custom object
kiddyjinjin Oct 30, 2024
cf3ae7e
merge upstream
kiddyjinjin Nov 1, 2024
e2c1936
add script to summary the benchmark result
kiddyjinjin Nov 1, 2024
9da92ff
fix repeat_interleave benchmark bug
kiddyjinjin Nov 4, 2024
585e206
fix repeat_interleave benchmark bug
kiddyjinjin Nov 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions benchmark/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,13 @@ def pytest_configure(config):
@pytest.fixture(scope="session", autouse=True)
def setup_once(request):
if request.config.getoption("--query"):
print("")
print("This is query mode; skipping all real benchmark functions.")
print("\nThis is query mode; all benchmark functions will be skipped.")
else:
note_info = (
"\n\nNote: The 'size' field below is for backward compatibility with previous versions of the benchmark. "
"\nThis field will be removed in a future release."
)
print(note_info)


@pytest.fixture()
Expand Down
171 changes: 171 additions & 0 deletions benchmark/summary_for_plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
"""
Script for Generating Operation Benchmark Summary Results

This script processes benchmark log files to calculate the average speedup for each
operation, categorized by data type. The summary provides an organized view of performance
gains, making it easier to analyze benchmark results by each tested data type.

Usage:
Pre-Step:
Collect benchmark results by running a command similar to the following:

pytest test_blas_perf.py --level core --record log

**Note**: The command above is an example. It runs benchmark tests on a subset of files.
You may need to modify it based on the files or parameters you want to test. Be sure to
include the `--record log` option, as it is required to generate the benchmark log file.

The example command above will generate a log file named `result_test_blas_perf--level_core--record_log.log`
in the benchmark directory.

Step 1:
Run this script with the generated log file as an argument:

python summary_for_plot.py result_test_blas_perf--level_core--record_log.log

Options:
-h, --help Show this help message and exit.
log_file_path Path to the benchmark log file to be processed.
"""

import argparse
import json
from collections import defaultdict
from dataclasses import dataclass
from typing import List

from attri_util import BenchmarkMetrics, BenchmarkResult


@dataclass
class SummaryResult:
op_name: str = ""
float16_speedup: float = 0.0
float32_speedup: float = 0.0
bfloat16_speedup: float = 0.0
int16_speedup: float = 0.0
int32_speedup: float = 0.0
bool_speedup: float = 0.0
cfloat_speedup: float = 0.0

def __str__(self) -> str:
return (
f"{self.op_name:<30} "
f"{self.float16_speedup:<15.6f} "
f"{self.float32_speedup:<15.6f} "
f"{self.bfloat16_speedup:<15.6f} "
f"{self.int16_speedup:<15.6f} "
f"{self.int32_speedup:<15.6f} "
f"{self.bool_speedup:<15.6f} "
f"{self.cfloat_speedup:<15.6f}"
)


def parse_log(log_file_path: str) -> List[BenchmarkResult]:
with open(log_file_path, "r") as file:
log_lines = [
line
for line in file.read().strip().split("\n")
if line.startswith("[INFO]")
]

benchmark_results = []
for line in log_lines:
if line.startswith("[INFO]"):
json_str = line[len("[INFO] ") :]
data = json.loads(json_str)
benchmark_result = BenchmarkResult(
op_name=data["op_name"],
dtype=data["dtype"],
mode=data["mode"],
level=data["level"],
result=[
BenchmarkMetrics(
legacy_shape=metric.get("legacy_shape"),
shape_detail=metric.get("shape_detail", []),
latency_base=metric.get("latency_base"),
latency=metric.get("latency"),
speedup=metric.get("speedup"),
accuracy=metric.get("accuracy"),
tflops=metric.get("tflops"),
utilization=metric.get("utilization"),
)
for metric in data["result"]
],
)

benchmark_results.append(benchmark_result)

return benchmark_results


def calculate_avg_speedup(metrics):
speedups = [metric.speedup for metric in metrics if metric.speedup is not None]
return sum(speedups) / len(speedups) if speedups else 0.0


def summary_for_plot(benchmark_results):
summary = defaultdict(SummaryResult)

dtype_mapping = {
"torch.float16": "float16_speedup",
"torch.float32": "float32_speedup",
"torch.bfloat16": "bfloat16_speedup",
"torch.int16": "int16_speedup",
"torch.int32": "int32_speedup",
"torch.bool": "bool_speedup",
"torch.cfloat": "cfloat_speedup",
}

for item in benchmark_results:
dtype_suffix = ""
if item.dtype in ["torch.float16", "torch.float32", "torch.bfloat16"]:
dtype_suffix = "" # No suffix for float types
else:
dtype_suffix = (
"_complex"
if "cfloat" in item.dtype
else "_int"
if "int" in item.dtype
else "_bool"
)

op_name = item.op_name + dtype_suffix
avg_speedup = calculate_avg_speedup(item.result)
cur_op_summary = summary[op_name]
cur_op_summary.op_name = op_name
setattr(
summary[op_name],
dtype_mapping.get(item.dtype, "float16_speedup"),
avg_speedup,
)

header = (
f"{'op_name':<30} "
f"{'float16_speedup':<16} "
f"{'float32_speedup':<16} "
f"{'bfloat16_speedup':<16} "
f"{'int16_speedup':<16} "
f"{'int32_speedup':<16} "
f"{'bool_speedup':<16} "
f"{'cfloat_speedup':<16}"
)

print(header)
for result in summary.values():
print(result)

return summary


def main(log_file_path):
result = parse_log(log_file_path)
summary_for_plot(result)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Parse benchmark log file.")
parser.add_argument("log_file_path", type=str, help="Path to the log file.")
args = parser.parse_args()

main(args.log_file_path)
29 changes: 0 additions & 29 deletions benchmark/test_special_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,32 +244,3 @@ def upsample_nearest2d_input_fn(shape, dtype, device):
dtypes=FLOAT_DTYPES,
)
bench.run()


def test_perf_repeat_interleave_self_tensor():
def repeat_interleave_self_tensor_arg(dtype, batch, size):
inp = torch.randn([batch, size], dtype=dtype, device="cuda")
repeats = torch.randint(
low=0,
high=0x2F,
size=[
batch,
],
device="cuda",
)
dim = 0
return (
inp,
repeats,
dim,
)

bench = Benchmark(
op_name="repeat_interleave_self_tensor",
torch_op=torch.repeat_interleave,
arg_func=repeat_interleave_self_tensor_arg,
dtypes=FLOAT_DTYPES,
batch=POINTWISE_BATCH,
sizes=SIZES,
)
bench.run()
16 changes: 14 additions & 2 deletions benchmark/test_tensor_concat_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,20 @@ def repeat_input_fn(shape, cur_dtype, device):

def repeat_interleave_self_input_fn(shape, dtype, device):
inp = generate_tensor_input(shape, dtype, device)
repeats = 3
yield inp, repeats
repeats = torch.randint(
low=0,
high=0x2F,
size=[
shape[0],
],
device=device,
)
dim = 0
# repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor
yield inp, repeats, dim
if Config.bench_level == BenchLevel.COMPREHENSIVE:
# repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor
yield inp, 3


@pytest.mark.parametrize(
Expand Down