Skip to content

Commit

Permalink
Merge pull request #54 from lambdaclass/bench-analysis
Browse files Browse the repository at this point in the history
Improves logging and plots benchmark information
  • Loading branch information
JulianGCalderon authored Oct 29, 2024
2 parents 4b32b84 + 18df2ea commit 38d05a8
Show file tree
Hide file tree
Showing 15 changed files with 575 additions and 23 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ blockifier = { git = "https://github.com/lambdaclass/sequencer", rev = "1b1b95ca
starknet_gateway = { git = "https://github.com/lambdaclass/sequencer", rev = "1b1b95cae7ae07b9bc778443ca75ee18008a6bc8"}

[patch.'https://github.com/lambdaclass/cairo_native']
cairo-native = { git = "https://github.com/lambdaclass//cairo_native.git", rev = "355c250f37cf0977ef2776b1aae2cb2e87c9da3d" }
cairo-native = { git = "https://github.com/lambdaclass//cairo_native.git", rev = "ab478323d6aee5e0424712bbde98de443b8cc72f" }
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,25 @@ To compare the outputs, you can use the following scripts. Some of them required
```bash
> ./scripts/delta_state_dumps.sh
```

### Plotting

In the `plotting` directory, you can find python scripts to plot relevant information. Before using them, you must first execute the replay with the `structured_logging` feature, and redirect the output to a file. You should do it with both Native execution and VM execution.

Make sure to erase the `compiled_programs` directory, then run:

```bash
cargo run --features benchmark,structured_logging bench-block-range 724000 724000 mainnet 1 | tee native-logs
cargo run --features benchmark,structured_logging,only_cairo_vm bench-block-range 724000 724000 mainnet 1 | tee vm-logs
```

Once you have done this, you can use the plotting scripts:

- `python ./plotting/plot_compilation_memory.py native-logs`: Size of the compiled native libraries, by contract class.
- `python ./plotting/plot_compilation_memory_corr.py native-logs vm-logs`: Size of the compiled native libraries, by the associated Casm contract size.
- `python ./plotting/plot_compilation_memory_trend.py native-logs vm-logs`: Size of the compiled native and casm contracts, by the sierra contract size.
- `python ./plotting/plot_compilation_time.py native-logs`: Native compilation time, by contract class
- `python ./plotting/plot_compilation_time_trend.py native-logs vm-logs`: Native and Casm compilation time, by the sierra contract size.
- `python ./plotting/plot_execution_time.py native-logs vm-logs`: Plots the execution time of Native vs VM, by contract class.
- `python ./plotting/plot_compilation_time_finer.py native-logs`: Native compilation time, with fine-grained stage separation, by contract class.

47 changes: 47 additions & 0 deletions plotting/plot_compilation_memory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from argparse import ArgumentParser

argument_parser = ArgumentParser('Stress Test Plotter')
argument_parser.add_argument("native_logs_path")
arguments = argument_parser.parse_args()

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

dataset = pd.read_json(arguments.native_logs_path, lines=True, typ="series")

def canonicalize_compilation_time(event):
if "contract compilation finished" not in event["fields"]["message"]:
return None

compilation_span = find_span(event, "contract compilation")
if compilation_span is None:
return None

return {
"class hash": compilation_span["class_hash"],
"size": event["fields"]["size"] / (1024 * 1024),
}

def find_span(event, name):
for span in event["spans"]:
if name in span["name"]:
return span
return None

def format_hash(class_hash):
return f"0x{class_hash[:6]}..."


dataset = dataset.apply(canonicalize_compilation_time).dropna().apply(pd.Series)

figure, ax = plt.subplots()

sns.set_color_codes("bright")
sns.barplot(ax=ax, y="class hash", x="size", data=dataset, formatter=format_hash) # type: ignore

ax.set_xlabel("Library Size (MiB)")
ax.set_ylabel("Class Hash")
ax.set_title("Library Size by Contract")

plt.show()
71 changes: 71 additions & 0 deletions plotting/plot_compilation_memory_corr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from argparse import ArgumentParser

argument_parser = ArgumentParser('Stress Test Plotter')
argument_parser.add_argument("native_logs_path")
argument_parser.add_argument("vm_logs_path")
arguments = argument_parser.parse_args()

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

dataset_native = pd.read_json(arguments.native_logs_path, lines=True, typ="series")
dataset_vm = pd.read_json(arguments.vm_logs_path, lines=True, typ="series")

def canonicalize_compilation_time(event):
if "contract compilation finished" not in event["fields"]["message"]:
return None

compilation_span = find_span(event, "contract compilation")
if compilation_span is None:
return None

return {
"class hash": compilation_span["class_hash"],
"size": event["fields"]["size"] / 1024,
}

def find_span(event, name):
for span in event["spans"]:
if name in span["name"]:
return span
return None

def format_hash(class_hash):
return f"0x{class_hash[:6]}..."


dataset_native = dataset_native.apply(canonicalize_compilation_time).dropna().apply(pd.Series)
dataset_vm = dataset_vm.apply(canonicalize_compilation_time).dropna().apply(pd.Series)

dataset_native = dataset_native.set_index("class hash")
dataset_vm = dataset_vm.set_index("class hash")

dataset = dataset_native.join(dataset_vm, lsuffix="_native", rsuffix="_casm")

figure, ax = plt.subplots()

sns.set_color_codes("bright")

sns.regplot(
x="size_native",
y="size_casm",
label = "Native (<1000)",
data=dataset[dataset["size_native"] < 1000],
ax = ax,
)
sns.regplot(
x="size_native",
y="size_casm",
label = "Native (>=1000)",
data=dataset[dataset["size_native"] >= 1000],
ax = ax,
)

ax.set_xlabel("Native Compilation Size (KiB)")
ax.set_ylabel("Casm Compilation Size (KiB)")
ax.set_title("Compilation Size Correlation")

ax.legend()

plt.show()
76 changes: 76 additions & 0 deletions plotting/plot_compilation_memory_trend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from argparse import ArgumentParser

argument_parser = ArgumentParser('Stress Test Plotter')
argument_parser.add_argument("native_logs_path")
argument_parser.add_argument("vm_logs_path")
arguments = argument_parser.parse_args()

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

dataset_native = pd.read_json(arguments.native_logs_path, lines=True, typ="series")
dataset_vm = pd.read_json(arguments.vm_logs_path, lines=True, typ="series")

def canonicalize_compilation_time(event):
if "contract compilation finished" not in event["fields"]["message"]:
return None

compilation_span = find_span(event, "contract compilation")
if compilation_span is None:
return None

return {
"class hash": compilation_span["class_hash"],
"length": compilation_span["length"] / 1024,
"size": event["fields"]["size"] / 1024,
}

def find_span(event, name):
for span in event["spans"]:
if name in span["name"]:
return span
return None

def format_hash(class_hash):
return f"0x{class_hash[:6]}..."


dataset_native = dataset_native.apply(canonicalize_compilation_time).dropna().apply(pd.Series)
dataset_vm = dataset_vm.apply(canonicalize_compilation_time).dropna().apply(pd.Series)

figure, ax = plt.subplots()

sns.set_color_codes("bright")

sns.regplot(
x="length",
y="size",
label = "Native (<1000)",
data=dataset_native[dataset_native["size"] < 1000],
ax = ax,
)
sns.regplot(
x="length",
y="size",
label = "Native (>=1000)",
data=dataset_native[dataset_native["size"] >= 1000],
ax = ax,
)
sns.regplot(
x="length",
y="size",
label = "Casm",
data=dataset_vm,
ax = ax,
)

ax.set_xlabel("Sierra size (KiB)")
ax.set_ylabel("Compiled size (KiB)")
ax.set_title("Compilation Size Trend")
ax.ticklabel_format(style="plain")


ax.legend()

plt.show()
47 changes: 47 additions & 0 deletions plotting/plot_compilation_time.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from argparse import ArgumentParser

argument_parser = ArgumentParser('Stress Test Plotter')
argument_parser.add_argument("native_logs_path")
arguments = argument_parser.parse_args()

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

dataset = pd.read_json(arguments.native_logs_path, lines=True, typ="series")

def canonicalize_compilation_time(event):
# keep contract compilation finished logs
if "contract compilation finished" not in event["fields"]["message"]:
return None

compilation_span = find_span(event, "contract compilation")
if compilation_span is None:
return None

return {
"class hash": compilation_span["class_hash"],
"time": float(event["fields"]["time"]),
}

def find_span(event, name):
for span in event["spans"]:
if name in span["name"]:
return span
return None

def format_hash(class_hash):
return f"0x{class_hash[:6]}..."

dataset = dataset.apply(canonicalize_compilation_time).dropna().apply(pd.Series)

figure, ax = plt.subplots()

sns.set_color_codes("bright")
sns.barplot(ax=ax, y="class hash", x="time", data=dataset, formatter=format_hash) # type: ignore

ax.set_xlabel("Compilation Time (ms)")
ax.set_ylabel("Class Hash")
ax.set_title("Native Compilation Time")

plt.show()
Loading

0 comments on commit 38d05a8

Please sign in to comment.