Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for profiling benchmarks #134

Merged
merged 6 commits into from
Jul 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,7 @@ Usage
[--duplicate DUPLICATE]
[-s SETUP]
[--teardown TEARDOWN]
[--profile PROFILE]
stmt [stmt ...]

Options:
Expand All @@ -512,6 +513,7 @@ Options:
* ``--duplicate=DUPLICATE``: Duplicate statements (``stmt`` statements, not
``SETUP``) to reduce the overhead of the outer loop and multiply
inner loops by DUPLICATE (see ``--inner-loops`` option).
* ``--profile=PROFILE``: Run the benchmark inside the cProfile profiler and output to the given file. This is a convenient way to profile a specific benchmark, but it will make the actual benchmark timings much less accurate.

.. note::
timeit ``-n`` (number) and ``-r`` (repeat) options become ``-l`` (loops) and
Expand Down
3 changes: 3 additions & 0 deletions pyperf/_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ def worker_cmd(self, calibrate_loops, calibrate_warmups, wpipe):
if args.track_memory:
cmd.append('--track-memory')

if args.profile:
cmd.extend(['--profile', args.profile])

if self.runner._add_cmdline_args:
self.runner._add_cmdline_args(cmd, args)

Expand Down
41 changes: 38 additions & 3 deletions pyperf/_process_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import os
import subprocess
import sys
import tempfile
import time

try:
Expand All @@ -35,11 +36,30 @@ def get_max_rss():
return 0


def bench_process(loops, args, kw):
def merge_profile_stats_files(src, dst):
"""
Merging one existing pstats file into another.
"""
import pstats
if os.path.isfile(dst):
src_stats = pstats.Stats(src)
dst_stats = pstats.Stats(dst)
dst_stats.add(src_stats)
dst_stats.dump_stats(dst)
os.unlink(src)
else:
os.rename(src, dst)


def bench_process(loops, args, kw, profile_filename=None):
max_rss = 0
range_it = range(loops)
start_time = time.perf_counter()

if profile_filename:
temp_profile_filename = tempfile.mktemp()
args = [args[0], "-m", "cProfile", "-o", temp_profile_filename] + args[1:]

for loop in range_it:
start_rss = get_max_rss()

Expand All @@ -51,13 +71,20 @@ def bench_process(loops, args, kw):
if exitcode != 0:
print("Command failed with exit code %s" % exitcode,
file=sys.stderr)
if profile_filename:
os.unlink(temp_profile_filename)
sys.exit(exitcode)

proc = None

rss = get_max_rss() - start_rss
max_rss = max(max_rss, rss)

if profile_filename:
merge_profile_stats_files(
temp_profile_filename, profile_filename
)

dt = time.perf_counter() - start_time
return (dt, max_rss)

Expand All @@ -70,10 +97,18 @@ def main():
sys.exit(1)

if len(sys.argv) < 3:
print("Usage: %s %s loops program [arg1 arg2 ...]"
print("Usage: %s %s loops program [arg1 arg2 ...] [--profile profile]"
% (os.path.basename(sys.executable), __file__))
sys.exit(1)

if "--profile" in sys.argv:
profile_idx = sys.argv.index("--profile")
profile_filename = sys.argv[profile_idx + 1]
del sys.argv[profile_idx]
del sys.argv[profile_idx]
else:
profile_filename = None

loops = int(sys.argv[1])
args = sys.argv[2:]

Expand All @@ -88,7 +123,7 @@ def main():
kw['stdout'] = devnull
kw['stderr'] = subprocess.STDOUT

dt, max_rss = bench_process(loops, args, kw)
dt, max_rss = bench_process(loops, args, kw, profile_filename)

if devnull is not None:
devnull.close()
Expand Down
55 changes: 51 additions & 4 deletions pyperf/_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
set_highest_priority)
from pyperf._formatter import format_timedelta
from pyperf._utils import (MS_WINDOWS, MAC_OS, abs_executable,
WritePipe, get_python_names)
WritePipe, get_python_names,
merge_profile_stats)
from pyperf._worker import WorkerProcessTask


Expand Down Expand Up @@ -48,6 +49,19 @@ def parse_python_names(names):
return parts


def profiling_wrapper(func):
"""
Wrap a function to collect profiling.
"""
import cProfile
profiler = cProfile.Profile()

def profiling_func(*args):
return profiler.runcall(func, *args)

return profiler, profiling_func


class CLIError(Exception):
pass

Expand Down Expand Up @@ -222,6 +236,11 @@ def __init__(self, values=None, warmups=None, processes=None,
'PYTHON as CHANGED_NAME '
'and REF_PYTHON as REF_NAME in results')

parser.add_argument('--profile',
type=str,
help='Collect profile data using cProfile '
'and output to the given file.')

memory = parser.add_mutually_exclusive_group()
memory.add_argument('--tracemalloc', action="store_true",
help='Trace memory allocations using tracemalloc')
Expand Down Expand Up @@ -454,12 +473,21 @@ def bench_time_func(self, name, time_func, *args, **kwargs):
if not self._check_worker_task():
return None

if self.args.profile:
profiler, time_func = profiling_wrapper(time_func)

def task_func(task, loops):
return time_func(loops, *args)

task = WorkerProcessTask(self, name, task_func, metadata)

task.inner_loops = inner_loops
return self._main(task)
result = self._main(task)

if self.args.profile:
merge_profile_stats(profiler, self.args.profile)

return result

def bench_func(self, name, func, *args, **kwargs):
""""Benchmark func(*args)."""
Expand All @@ -474,6 +502,9 @@ def bench_func(self, name, func, *args, **kwargs):
if args:
func = functools.partial(func, *args)

if self.args.profile:
profiler, func = profiling_wrapper(func)

def task_func(task, loops):
# use fast local variables
local_timer = time.perf_counter
Expand All @@ -494,7 +525,12 @@ def task_func(task, loops):

task = WorkerProcessTask(self, name, task_func, metadata)
task.inner_loops = inner_loops
return self._main(task)
result = self._main(task)

if self.args.profile:
merge_profile_stats(profiler, self.args.profile)

return result

def bench_async_func(self, name, func, *args, **kwargs):
"""Benchmark await func(*args)"""
Expand All @@ -509,6 +545,9 @@ def bench_async_func(self, name, func, *args, **kwargs):
if args:
func = functools.partial(func, *args)

if self.args.profile:
profiler, func = profiling_wrapper(func)

def task_func(task, loops):
if loops != 1:
async def main():
Expand Down Expand Up @@ -549,7 +588,12 @@ async def main():

task = WorkerProcessTask(self, name, task_func, metadata)
task.inner_loops = inner_loops
return self._main(task)
result = self._main(task)

if self.args.profile:
merge_profile_stats(profiler, self.args.profile)

return result

def timeit(self, name, stmt=None, setup="pass", teardown="pass",
inner_loops=None, duplicate=None, metadata=None, globals=None):
Expand Down Expand Up @@ -671,6 +715,9 @@ def bench_command(self, name, command):
if not self._check_worker_task():
return None

if self.args.profile:
command.extend(["--profile", self.args.profile])

# Use lazy import to limit imports on 'import pyperf'
from pyperf._command import BenchCommandTask
task = BenchCommandTask(self, name, command)
Expand Down
21 changes: 21 additions & 0 deletions pyperf/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,3 +405,24 @@ def geometric_mean(data):
if not data:
raise ValueError("empty data")
return _geometric_mean(data)


def merge_profile_stats(profiler, dst):
"""
Save pstats by merging into an existing file.
"""
import pstats
if os.path.isfile(dst):
try:
src_stats = pstats.Stats(profiler)
except TypeError:
# If no actual stats were collected, a TypeError is raised
# and we don't need to merge anything into the output.
return

dst_stats = pstats.Stats(dst)
dst_stats.add(src_stats)
dst_stats.dump_stats(dst)
else:
profiler.dump_stats(dst)

49 changes: 48 additions & 1 deletion pyperf/tests/test_runner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import collections
import os.path
import pstats
import sys
import tempfile
import textwrap
Expand Down Expand Up @@ -73,7 +74,6 @@ class ClockInfo:
self.assertEqual(bench.get_nrun(), 1)

return Result(runner, bench, stdout)

def test_worker(self):
result = self.exec_runner('--worker', '-l1', '-w1')
self.assertRegex(result.stdout,
Expand All @@ -83,6 +83,53 @@ def test_debug_single_value(self):
result = self.exec_runner('--debug-single-value', '--worker')
self.assertEqual(result.bench.get_nvalue(), 1)

def test_profile_time_func(self):
with tempfile.NamedTemporaryFile('wb+') as tmp:
name = tmp.name
args = ['--worker', '-l1', '-w1', '--profile', name]
runner = self.create_runner(args)

def time_func(loops):
return 1.0

runner.bench_time_func('bench1', time_func)

try:
s = pstats.Stats(name)
if sys.version_info < (3, 9):
assert len(s.stats)
else:
assert len(s.get_stats_profile().func_profiles)
finally:
if os.path.isfile(name):
os.unlink(name)

def test_profile_func(self):
with tempfile.NamedTemporaryFile('wb+') as tmp:
name = tmp.name
args = ['--worker', '-l1', '-w1', '--profile', name]
runner = self.create_runner(args)

def external():
return [1] * 1000

def func():
external()
return 1.0

runner.bench_func('bench1', func)

try:
import pstats
s = pstats.Stats(name)
if sys.version_info < (3, 9):
assert len(s.stats)
else:
assert len(s.get_stats_profile().func_profiles)
finally:
if os.path.isfile(name):
os.unlink(name)

def test_pipe(self):
rpipe, wpipe = create_pipe()
with rpipe:
Expand Down