Merge pull request #54 from lambdaclass/bench-analysis

Improves logging and plots benchmark information
lambdaclass · Oct 29, 2024 · 38d05a8 · 38d05a8
2 parents 4b32b84 + 18df2ea
commit 38d05a8
Show file tree

Hide file tree

Showing 15 changed files with 575 additions and 23 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -20,4 +20,4 @@ blockifier = { git = "https://github.com/lambdaclass/sequencer", rev = "1b1b95ca
 starknet_gateway = { git = "https://github.com/lambdaclass/sequencer", rev = "1b1b95cae7ae07b9bc778443ca75ee18008a6bc8"}
 
 [patch.'https://github.com/lambdaclass/cairo_native']
-cairo-native = { git = "https://github.com/lambdaclass//cairo_native.git", rev = "355c250f37cf0977ef2776b1aae2cb2e87c9da3d" }
+cairo-native = { git = "https://github.com/lambdaclass//cairo_native.git", rev = "ab478323d6aee5e0424712bbde98de443b8cc72f" }
diff --git a/README.md b/README.md
@@ -118,3 +118,25 @@ To compare the outputs, you can use the following scripts. Some of them required
    ```bash
    > ./scripts/delta_state_dumps.sh
    ```
+
+### Plotting
+
+In the `plotting` directory, you can find python scripts to plot relevant information. Before using them, you must first execute the replay with the `structured_logging` feature, and redirect the output to a file. You should do it with both Native execution and VM execution.
+
+Make sure to erase the `compiled_programs` directory, then run:
+
+```bash
+cargo run --features benchmark,structured_logging bench-block-range 724000 724000 mainnet 1 | tee native-logs
+cargo run --features benchmark,structured_logging,only_cairo_vm bench-block-range 724000 724000 mainnet 1 | tee vm-logs
+```
+
+Once you have done this, you can use the plotting scripts:
+
+- `python ./plotting/plot_compilation_memory.py native-logs`: Size of the compiled native libraries, by contract class.
+- `python ./plotting/plot_compilation_memory_corr.py native-logs vm-logs`: Size of the compiled native libraries, by the associated Casm contract size.
+- `python ./plotting/plot_compilation_memory_trend.py native-logs vm-logs`: Size of the compiled native and casm contracts, by the sierra contract size.
+- `python ./plotting/plot_compilation_time.py native-logs`: Native compilation time, by contract class
+- `python ./plotting/plot_compilation_time_trend.py native-logs vm-logs`: Native and Casm compilation time, by the sierra contract size.
+- `python ./plotting/plot_execution_time.py native-logs vm-logs`: Plots the execution time of Native vs VM, by contract class.
+- `python ./plotting/plot_compilation_time_finer.py native-logs`: Native compilation time, with fine-grained stage separation, by contract class.
+
diff --git a/plotting/plot_compilation_memory.py b/plotting/plot_compilation_memory.py
@@ -0,0 +1,47 @@
+from argparse import ArgumentParser
+
+argument_parser = ArgumentParser('Stress Test Plotter')
+argument_parser.add_argument("native_logs_path")
+arguments = argument_parser.parse_args()
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+
+dataset = pd.read_json(arguments.native_logs_path, lines=True, typ="series")
+
+def canonicalize_compilation_time(event):
+    if "contract compilation finished" not in event["fields"]["message"]:
+        return None
+
+    compilation_span = find_span(event, "contract compilation")
+    if compilation_span is None:
+        return None
+
+    return {
+        "class hash": compilation_span["class_hash"],
+        "size": event["fields"]["size"] / (1024 * 1024),
+    }
+
+def find_span(event, name):
+    for span in event["spans"]:
+        if name in span["name"]:
+            return span
+    return None
+
+def format_hash(class_hash):
+    return f"0x{class_hash[:6]}..."
+
+
+dataset = dataset.apply(canonicalize_compilation_time).dropna().apply(pd.Series)
+
+figure, ax = plt.subplots()
+
+sns.set_color_codes("bright")
+sns.barplot(ax=ax, y="class hash", x="size", data=dataset, formatter=format_hash) # type: ignore
+
+ax.set_xlabel("Library Size (MiB)")
+ax.set_ylabel("Class Hash")
+ax.set_title("Library Size by Contract")
+
+plt.show()
diff --git a/plotting/plot_compilation_memory_corr.py b/plotting/plot_compilation_memory_corr.py
@@ -0,0 +1,71 @@
+from argparse import ArgumentParser
+
+argument_parser = ArgumentParser('Stress Test Plotter')
+argument_parser.add_argument("native_logs_path")
+argument_parser.add_argument("vm_logs_path")
+arguments = argument_parser.parse_args()
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+
+dataset_native = pd.read_json(arguments.native_logs_path, lines=True, typ="series")
+dataset_vm = pd.read_json(arguments.vm_logs_path, lines=True, typ="series")
+
+def canonicalize_compilation_time(event):
+    if "contract compilation finished" not in event["fields"]["message"]:
+        return None
+
+    compilation_span = find_span(event, "contract compilation")
+    if compilation_span is None:
+        return None
+
+    return {
+        "class hash": compilation_span["class_hash"],
+        "size": event["fields"]["size"] / 1024,
+    }
+
+def find_span(event, name):
+    for span in event["spans"]:
+        if name in span["name"]:
+            return span
+    return None
+
+def format_hash(class_hash):
+    return f"0x{class_hash[:6]}..."
+
+
+dataset_native = dataset_native.apply(canonicalize_compilation_time).dropna().apply(pd.Series)
+dataset_vm = dataset_vm.apply(canonicalize_compilation_time).dropna().apply(pd.Series)
+
+dataset_native = dataset_native.set_index("class hash")
+dataset_vm = dataset_vm.set_index("class hash")
+
+dataset = dataset_native.join(dataset_vm, lsuffix="_native", rsuffix="_casm")
+
+figure, ax = plt.subplots()
+
+sns.set_color_codes("bright")
+
+sns.regplot(
+    x="size_native",
+    y="size_casm",
+    label = "Native (<1000)",
+    data=dataset[dataset["size_native"] < 1000],
+    ax = ax,
+)
+sns.regplot(
+    x="size_native",
+    y="size_casm",
+    label = "Native (>=1000)",
+    data=dataset[dataset["size_native"] >= 1000],
+    ax = ax,
+)
+
+ax.set_xlabel("Native Compilation Size (KiB)")
+ax.set_ylabel("Casm Compilation Size (KiB)")
+ax.set_title("Compilation Size Correlation")
+
+ax.legend()
+
+plt.show()
diff --git a/plotting/plot_compilation_memory_trend.py b/plotting/plot_compilation_memory_trend.py
@@ -0,0 +1,76 @@
+from argparse import ArgumentParser
+
+argument_parser = ArgumentParser('Stress Test Plotter')
+argument_parser.add_argument("native_logs_path")
+argument_parser.add_argument("vm_logs_path")
+arguments = argument_parser.parse_args()
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+
+dataset_native = pd.read_json(arguments.native_logs_path, lines=True, typ="series")
+dataset_vm = pd.read_json(arguments.vm_logs_path, lines=True, typ="series")
+
+def canonicalize_compilation_time(event):
+    if "contract compilation finished" not in event["fields"]["message"]:
+        return None
+
+    compilation_span = find_span(event, "contract compilation")
+    if compilation_span is None:
+        return None
+
+    return {
+        "class hash": compilation_span["class_hash"],
+        "length": compilation_span["length"] / 1024,
+        "size": event["fields"]["size"] / 1024,
+    }
+
+def find_span(event, name):
+    for span in event["spans"]:
+        if name in span["name"]:
+            return span
+    return None
+
+def format_hash(class_hash):
+    return f"0x{class_hash[:6]}..."
+
+
+dataset_native = dataset_native.apply(canonicalize_compilation_time).dropna().apply(pd.Series)
+dataset_vm = dataset_vm.apply(canonicalize_compilation_time).dropna().apply(pd.Series)
+
+figure, ax = plt.subplots()
+
+sns.set_color_codes("bright")
+
+sns.regplot(
+    x="length",
+    y="size",
+    label = "Native (<1000)",
+    data=dataset_native[dataset_native["size"] < 1000],
+    ax = ax,
+)
+sns.regplot(
+    x="length",
+    y="size",
+    label = "Native (>=1000)",
+    data=dataset_native[dataset_native["size"] >= 1000],
+    ax = ax,
+)
+sns.regplot(
+    x="length",
+    y="size",
+    label = "Casm",
+    data=dataset_vm,
+    ax = ax,
+)
+
+ax.set_xlabel("Sierra size (KiB)")
+ax.set_ylabel("Compiled size (KiB)")
+ax.set_title("Compilation Size Trend")
+ax.ticklabel_format(style="plain")
+
+
+ax.legend()
+
+plt.show()
diff --git a/plotting/plot_compilation_time.py b/plotting/plot_compilation_time.py
@@ -0,0 +1,47 @@
+from argparse import ArgumentParser
+
+argument_parser = ArgumentParser('Stress Test Plotter')
+argument_parser.add_argument("native_logs_path")
+arguments = argument_parser.parse_args()
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+
+dataset = pd.read_json(arguments.native_logs_path, lines=True, typ="series")
+
+def canonicalize_compilation_time(event):
+    # keep contract compilation finished logs
+    if "contract compilation finished" not in event["fields"]["message"]:
+        return None
+
+    compilation_span = find_span(event, "contract compilation")
+    if compilation_span is None:
+        return None
+
+    return {
+        "class hash": compilation_span["class_hash"],
+        "time": float(event["fields"]["time"]),
+    }
+
+def find_span(event, name):
+    for span in event["spans"]:
+        if name in span["name"]:
+            return span
+    return None
+
+def format_hash(class_hash):
+    return f"0x{class_hash[:6]}..."
+
+dataset = dataset.apply(canonicalize_compilation_time).dropna().apply(pd.Series)
+
+figure, ax = plt.subplots()
+
+sns.set_color_codes("bright")
+sns.barplot(ax=ax, y="class hash", x="time", data=dataset, formatter=format_hash) # type: ignore
+
+ax.set_xlabel("Compilation Time (ms)")
+ax.set_ylabel("Class Hash")
+ax.set_title("Native Compilation Time")
+
+plt.show()