From 43a4a09e772313d13b527abcc2a8c54409bab693 Mon Sep 17 00:00:00 2001
From: Alexandru Fikl <alexfikl@gmail.com>
Date: Mon, 26 Sep 2022 09:29:51 +0300
Subject: [PATCH] rip out timing collection

---
 examples/cost.py                |  6 +--
 pytential/qbx/__init__.py       | 50 ++++----------------
 pytential/qbx/fmm.py            | 84 +++++++++------------------------
 pytential/qbx/fmmlib.py         |  7 ---
 pytential/source.py             | 13 +----
 pytential/symbolic/execution.py | 42 ++++-------------
 pytential/unregularized.py      | 24 +++-------
 test/test_cost_model.py         | 61 ++++++++++--------------
 8 files changed, 79 insertions(+), 208 deletions(-)

diff --git a/examples/cost.py b/examples/cost.py
index 55c66399d..4be9d21bd 100644
--- a/examples/cost.py
+++ b/examples/cost.py
@@ -138,8 +138,7 @@ def calibrate_cost_model(ctx):
 
         for _ in range(RUNS):
             timing_data = {}
-            bound_op.eval({"sigma": sigma}, array_context=actx,
-                    timing_data=timing_data)
+            bound_op.eval({"sigma": sigma}, array_context=actx)
 
             model_results.append(modeled_cost)
             timing_results.append(timing_data)
@@ -175,8 +174,7 @@ def test_cost_model(ctx, calibration_params):
         temp_timing_results = []
         for _ in range(RUNS):
             timing_data = {}
-            bound_op.eval({"sigma": sigma},
-                    array_context=actx, timing_data=timing_data)
+            bound_op.eval({"sigma": sigma}, array_context=actx)
             temp_timing_results.append(one(timing_data.values()))
 
         timing_result = {}
diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py
index 98ff4017c..d0d401bd9 100644
--- a/pytential/qbx/__init__.py
+++ b/pytential/qbx/__init__.py
@@ -425,13 +425,11 @@ def op_group_features(self, expr):
 
     # {{{ internal functionality for execution
 
-    def exec_compute_potential_insn(self, actx, insn, bound_expr, evaluate,
-            return_timing_data):
+    def exec_compute_potential_insn(self, actx, insn, bound_expr, evaluate):
         extra_args = {}
 
         if self.fmm_level_to_order is False:
             func = self.exec_compute_potential_insn_direct
-            extra_args["return_timing_data"] = return_timing_data
 
         else:
             func = self.exec_compute_potential_insn_fmm
@@ -440,11 +438,7 @@ def drive_fmm(
                     actx, wrangler, strengths, geo_data, kernel, kernel_arguments):
                 del geo_data, kernel, kernel_arguments
                 from pytential.qbx.fmm import drive_fmm
-                if return_timing_data:
-                    timing_data = {}
-                else:
-                    timing_data = None
-                return drive_fmm(actx, wrangler, strengths, timing_data), timing_data
+                return drive_fmm(actx, wrangler, strengths)
 
             extra_args["fmm_driver"] = drive_fmm
 
@@ -473,25 +467,13 @@ def cost_model_compute_potential_insn(self, actx, insn, bound_expr, evaluate,
 
         def drive_cost_model(
                 actx, wrangler, strengths, geo_data, kernel, kernel_arguments):
-
-            if per_box:
-                cost_model_result, metadata = self.cost_model.qbx_cost_per_box(
-                    actx, geo_data, kernel, kernel_arguments,
-                    calibration_params
-                )
-            else:
-                cost_model_result, metadata = self.cost_model.qbx_cost_per_stage(
-                    actx, geo_data, kernel, kernel_arguments,
-                    calibration_params
-                )
-
             from pytools.obj_array import obj_array_vectorize
             from functools import partial
             return (
                     obj_array_vectorize(
                         partial(wrangler.finalize_potentials, actx),
-                        wrangler.full_output_zeros(actx)),
-                    (cost_model_result, metadata))
+                        wrangler.full_output_zeros(actx))
+                    )
 
         return self._dispatch_compute_potential_insn(
             actx, insn, bound_expr, evaluate,
@@ -595,11 +577,8 @@ def exec_compute_potential_insn_fmm(self, actx: PyOpenCLArrayContext,
         """
         :arg fmm_driver: A function that accepts four arguments:
             *wrangler*, *strength*, *geo_data*, *kernel*, *kernel_arguments*
-        :returns: a tuple ``(assignments, extra_outputs)``, where *assignments*
-            is a list of tuples containing pairs ``(name, value)`` representing
-            assignments to be performed in the evaluation context.
-            *extra_outputs* is data that *fmm_driver* may return
-            (such as timing data), passed through unmodified.
+        :returns: a list of assignments containing pairs ``(name, value)``
+            representing assignments to be performed in the evaluation context.
         """
         target_name_and_side_to_number, target_discrs_and_qbx_sides = (
                 self.get_target_discrs_and_qbx_sides(insn, bound_expr))
@@ -663,7 +642,7 @@ def exec_compute_potential_insn_fmm(self, actx: PyOpenCLArrayContext,
         # }}}
 
         # Execute global QBX.
-        all_potentials_on_every_target, extra_outputs = (
+        all_potentials_on_every_target = (
                 fmm_driver(
                     actx, wrangler, flat_strengths, geo_data,
                     base_kernel, kernel_extra_kwargs))
@@ -686,7 +665,7 @@ def exec_compute_potential_insn_fmm(self, actx: PyOpenCLArrayContext,
 
             results.append((o.name, result))
 
-        return results, extra_outputs
+        return results
 
     # }}}
 
@@ -758,18 +737,10 @@ def get_qbx_target_numberer(self, dtype):
                         *count = item;
                     """)
 
-    def exec_compute_potential_insn_direct(self, actx, insn, bound_expr, evaluate,
-            return_timing_data):
+    def exec_compute_potential_insn_direct(self, actx, insn, bound_expr, evaluate):
         from pytential import bind, sym
         from meshmode.discretization import Discretization
 
-        if return_timing_data:
-            from pytential.source import UnableToCollectTimingData
-            from warnings import warn
-            warn(
-                    "Timing data collection not supported.",
-                    category=UnableToCollectTimingData)
-
         # {{{ evaluate and flatten inputs
 
         @memoize_in(bound_expr.places,
@@ -947,8 +918,7 @@ def _flat_centers(dofdesc, qbx_forced_limit):
 
         # }}}
 
-        timing_data = {}
-        return results, timing_data
+        return results
 
     # }}}
 
diff --git a/pytential/qbx/fmm.py b/pytential/qbx/fmm.py
index 82c90198d..176674620 100644
--- a/pytential/qbx/fmm.py
+++ b/pytential/qbx/fmm.py
@@ -21,11 +21,10 @@
 """
 
 from pytools import ProcessLogger, log_process, memoize_method
-from boxtree.timing import TimingRecorder
 from sumpy.fmm import (
     SumpyTreeIndependentDataForWrangler,
     SumpyExpansionWrangler,
-    SumpyTimingFuture)
+    )
 
 from pytential.array_context import PyOpenCLArrayContext
 from pytential.qbx.interactions import P2QBXLFromCSR, M2QBXL, L2QBXL, QBXL2P
@@ -194,11 +193,10 @@ def box_target_list_kwargs(self):
     @log_process(logger)
     def form_global_qbx_locals(self, actx: PyOpenCLArrayContext, src_weight_vecs):
         local_exps = self.qbx_local_expansion_zeros(actx)
-        events = []
 
         geo_data = self.geo_data
         if len(geo_data.global_qbx_centers()) == 0:
-            return (local_exps, SumpyTimingFuture(actx.queue, events))
+            return local_exps
 
         traversal = geo_data.traversal()
 
@@ -225,17 +223,16 @@ def form_global_qbx_locals(self, actx: PyOpenCLArrayContext, src_weight_vecs):
                 **kwargs)
         assert local_exps is result
 
-        return (result, SumpyTimingFuture(actx.queue, events))
+        return result
 
     @log_process(logger)
     def translate_box_multipoles_to_qbx_local(
             self, actx: PyOpenCLArrayContext, multipole_exps):
         qbx_expansions = self.qbx_local_expansion_zeros(actx)
-        events = []
 
         geo_data = self.geo_data
         if geo_data.ncenters == 0:
-            return (qbx_expansions, SumpyTimingFuture(actx.queue, events))
+            return qbx_expansions
 
         traversal = geo_data.traversal()
 
@@ -273,7 +270,7 @@ def translate_box_multipoles_to_qbx_local(
 
             assert qbx_expansions_res is qbx_expansions
 
-        return (qbx_expansions, SumpyTimingFuture(actx.queue, events))
+        return qbx_expansions
 
     @log_process(logger)
     def translate_box_local_to_qbx_local(
@@ -281,10 +278,9 @@ def translate_box_local_to_qbx_local(
         qbx_expansions = self.qbx_local_expansion_zeros(actx)
 
         geo_data = self.geo_data
-        events = []
 
         if geo_data.ncenters == 0:
-            return (qbx_expansions, SumpyTimingFuture(actx.queue, events))
+            return qbx_expansions
 
         trav = geo_data.traversal()
         wait_for = local_exps.events
@@ -318,17 +314,16 @@ def translate_box_local_to_qbx_local(
 
             assert qbx_expansions_res is qbx_expansions
 
-        return (qbx_expansions, SumpyTimingFuture(actx.queue, events))
+        return qbx_expansions
 
     @log_process(logger)
     def eval_qbx_expansions(self, actx: PyOpenCLArrayContext, qbx_expansions):
         pot = self.full_output_zeros(actx)
 
         geo_data = self.geo_data
-        events = []
 
         if len(geo_data.global_qbx_centers()) == 0:
-            return (pot, SumpyTimingFuture(actx.queue, events))
+            return pot
 
         ctt = geo_data.center_to_tree_targets()
         qbxl2p = self.tree_indep.qbxl2p(self.qbx_order)
@@ -352,13 +347,12 @@ def eval_qbx_expansions(self, actx: PyOpenCLArrayContext, qbx_expansions):
         for pot_i, pot_res_i in zip(pot, pot_res):
             assert pot_i is pot_res_i
 
-        return (pot, SumpyTimingFuture(actx.queue, events))
+        return pot
 
     @log_process(logger)
     def eval_target_specific_qbx_locals(
             self, actx: PyOpenCLArrayContext, src_weight_vecs):
-        return (self.full_output_zeros(actx),
-                SumpyTimingFuture(actx.queue, events=()))
+        return self.full_output_zeros(actx)
 
     # }}}
 
@@ -382,7 +376,6 @@ def drive_fmm(
         actx: PyOpenCLArrayContext,
         expansion_wrangler,
         src_weight_vecs,
-        timing_data=None,
         traversal=None):
     """Top-level driver routine for the QBX fast multipole calculation.
 
@@ -390,8 +383,6 @@ def drive_fmm(
         :class:`boxtree.fmm.ExpansionWranglerInterface`.
     :arg src_weight_vecs: A sequence of source 'density/weights/charges'.
         Passed unmodified to *expansion_wrangler*.
-    :arg timing_data: Either *None* or a dictionary that collects
-        timing data.
 
     Returns the potentials computed by *expansion_wrangler*.
 
@@ -404,7 +395,6 @@ def drive_fmm(
         traversal = geo_data.traversal()
 
     tree = traversal.tree
-    recorder = TimingRecorder()
 
     # Interface guidelines: Attributes of the tree are assumed to be known
     # to the expansion wrangler and should not be passed.
@@ -416,44 +406,38 @@ def drive_fmm(
 
     # {{{ construct local multipoles
 
-    mpole_exps, timing_future = wrangler.form_multipoles(
+    mpole_exps = wrangler.form_multipoles(
             actx,
             traversal.level_start_source_box_nrs,
             traversal.source_boxes,
             src_weight_vecs)
 
-    recorder.add("form_multipoles", timing_future)
-
     # }}}
 
     # {{{ propagate multipoles upward
 
-    mpole_exps, timing_future = wrangler.coarsen_multipoles(
+    mpole_exps = wrangler.coarsen_multipoles(
             actx,
             traversal.level_start_source_parent_box_nrs,
             traversal.source_parent_boxes,
             mpole_exps)
 
-    recorder.add("coarsen_multipoles", timing_future)
-
     # }}}
 
     # {{{ direct evaluation from neighbor source boxes ("list 1")
 
-    non_qbx_potentials, timing_future = wrangler.eval_direct(
+    non_qbx_potentials = wrangler.eval_direct(
             actx,
             traversal.target_boxes,
             traversal.neighbor_source_boxes_starts,
             traversal.neighbor_source_boxes_lists,
             src_weight_vecs)
 
-    recorder.add("eval_direct", timing_future)
-
     # }}}
 
     # {{{ translate separated siblings' ("list 2") mpoles to local
 
-    local_exps, timing_future = wrangler.multipole_to_local(
+    local_exps = wrangler.multipole_to_local(
             actx,
             traversal.level_start_target_or_target_parent_box_nrs,
             traversal.target_or_target_parent_boxes,
@@ -461,8 +445,6 @@ def drive_fmm(
             traversal.from_sep_siblings_lists,
             mpole_exps)
 
-    recorder.add("multipole_to_local", timing_future)
-
     # }}}
 
     # {{{ evaluate sep. smaller mpoles ("list 3") at particles
@@ -470,14 +452,12 @@ def drive_fmm(
     # (the point of aiming this stage at particles is specifically to keep its
     # contribution *out* of the downward-propagating local expansions)
 
-    mpole_result, timing_future = wrangler.eval_multipoles(
+    mpole_result = wrangler.eval_multipoles(
             actx,
             traversal.target_boxes_sep_smaller_by_source_level,
             traversal.from_sep_smaller_by_level,
             mpole_exps)
 
-    recorder.add("eval_multipoles", timing_future)
-
     non_qbx_potentials = non_qbx_potentials + mpole_result
 
     # assert that list 3 close has been merged into list 1
@@ -487,7 +467,7 @@ def drive_fmm(
 
     # {{{ form locals for separated bigger source boxes ("list 4")
 
-    local_result, timing_future = wrangler.form_locals(
+    local_result = wrangler.form_locals(
             actx,
             traversal.level_start_target_or_target_parent_box_nrs,
             traversal.target_or_target_parent_boxes,
@@ -495,8 +475,6 @@ def drive_fmm(
             traversal.from_sep_bigger_lists,
             src_weight_vecs)
 
-    recorder.add("form_locals", timing_future)
-
     local_exps = local_exps + local_result
 
     # assert that list 4 close has been merged into list 1
@@ -506,26 +484,22 @@ def drive_fmm(
 
     # {{{ propagate local_exps downward
 
-    local_exps, timing_future = wrangler.refine_locals(
+    local_exps = wrangler.refine_locals(
             actx,
             traversal.level_start_target_or_target_parent_box_nrs,
             traversal.target_or_target_parent_boxes,
             local_exps)
 
-    recorder.add("refine_locals", timing_future)
-
     # }}}
 
     # {{{ evaluate locals
 
-    local_result, timing_future = wrangler.eval_locals(
+    local_result = wrangler.eval_locals(
             actx,
             traversal.level_start_target_box_nrs,
             traversal.target_boxes,
             local_exps)
 
-    recorder.add("eval_locals", timing_future)
-
     non_qbx_potentials = non_qbx_potentials + local_result
 
     # }}}
@@ -537,37 +511,27 @@ def drive_fmm(
     # via unified List 1).  Which one is used depends on the wrangler. If one of
     # them is unused the corresponding output entries will be zero.
 
-    qbx_expansions, timing_future = (
+    qbx_expansions = (
         wrangler.form_global_qbx_locals(actx, src_weight_vecs))
 
-    recorder.add("form_global_qbx_locals", timing_future)
-
-    local_result, timing_future = (
+    local_result = (
         wrangler.translate_box_multipoles_to_qbx_local(actx, mpole_exps))
 
-    recorder.add("translate_box_multipoles_to_qbx_local", timing_future)
-
     qbx_expansions = qbx_expansions + local_result
 
-    local_result, timing_future = (
+    local_result = (
         wrangler.translate_box_local_to_qbx_local(actx, local_exps))
 
-    recorder.add("translate_box_local_to_qbx_local", timing_future)
-
     qbx_expansions = qbx_expansions + local_result
 
-    qbx_potentials, timing_future = (
+    qbx_potentials = (
         wrangler.eval_qbx_expansions(actx, qbx_expansions))
 
-    recorder.add("eval_qbx_expansions", timing_future)
-
-    ts_result, timing_future = (
+    ts_result = (
         wrangler.eval_target_specific_qbx_locals(actx, src_weight_vecs))
 
     qbx_potentials = qbx_potentials + ts_result
 
-    recorder.add("eval_target_specific_qbx_locals", timing_future)
-
     # }}}
 
     # {{{ reorder potentials
@@ -594,8 +558,6 @@ def reorder_and_finalize_potentials(x):
 
     fmm_proc.done()
 
-    if timing_data is not None:
-        timing_data.update(recorder.summarize())
     return result
 
 # }}}
diff --git a/pytential/qbx/fmmlib.py b/pytential/qbx/fmmlib.py
index 65941e629..8a96bc529 100644
--- a/pytential/qbx/fmmlib.py
+++ b/pytential/qbx/fmmlib.py
@@ -35,8 +35,6 @@
 from pytential.array_context import PyOpenCLArrayContext
 import pytential.qbx.target_specific as ts
 
-
-from boxtree.timing import return_timing_data
 from pytools import log_process
 
 import logging
@@ -285,7 +283,6 @@ def qbx_local_expansion_zeros(self):
     # {{{ p2qbxl
 
     @log_process(logger)
-    @return_timing_data
     def form_global_qbx_locals(self, actx: PyOpenCLArrayContext, src_weight_vecs):
         src_weights, = src_weight_vecs
         if self.tree_indep.using_tsqbx:
@@ -342,7 +339,6 @@ def form_global_qbx_locals(self, actx: PyOpenCLArrayContext, src_weight_vecs):
     # {{{ m2qbxl
 
     @log_process(logger)
-    @return_timing_data
     def translate_box_multipoles_to_qbx_local(self, actx, multipole_exps):
         qbx_exps = self.qbx_local_expansion_zeros()
 
@@ -455,7 +451,6 @@ def translate_box_multipoles_to_qbx_local(self, actx, multipole_exps):
     # }}}
 
     @log_process(logger)
-    @return_timing_data
     def translate_box_local_to_qbx_local(self, actx, local_exps):
         qbx_expansions = self.qbx_local_expansion_zeros()
 
@@ -548,7 +543,6 @@ def translate_box_local_to_qbx_local(self, actx, local_exps):
         return qbx_expansions
 
     @log_process(logger)
-    @return_timing_data
     def eval_qbx_expansions(self, actx, qbx_expansions):
         output = self.full_output_zeros(actx)
 
@@ -583,7 +577,6 @@ def eval_qbx_expansions(self, actx, qbx_expansions):
         return output
 
     @log_process(logger)
-    @return_timing_data
     def eval_target_specific_qbx_locals(self, actx, src_weight_vecs):
         src_weights, = src_weight_vecs
         if not self.tree_indep.using_tsqbx:
diff --git a/pytential/source.py b/pytential/source.py
index 2a569b02f..c25908956 100644
--- a/pytential/source.py
+++ b/pytential/source.py
@@ -26,7 +26,6 @@
 from arraycontext import flatten, unflatten
 from meshmode.dof_array import DOFArray
 
-from sumpy.fmm import UnableToCollectTimingData
 from pytential.array_context import PyOpenCLArrayContext
 
 
@@ -163,14 +162,7 @@ def cost_model_compute_potential_insn(self, actx, insn, bound_expr,
                                           evaluate, costs):
         raise NotImplementedError
 
-    def exec_compute_potential_insn(self, actx, insn, bound_expr, evaluate,
-            return_timing_data):
-        if return_timing_data:
-            from warnings import warn
-            warn(
-                   "Timing data collection not supported.",
-                   category=UnableToCollectTimingData)
-
+    def exec_compute_potential_insn(self, actx, insn, bound_expr, evaluate):
         p2p = None
 
         kernel_args = evaluate_kernel_arguments(
@@ -201,8 +193,7 @@ def exec_compute_potential_insn(self, actx, insn, bound_expr, evaluate,
 
             results.append((o.name, result))
 
-        timing_data = {}
-        return results, timing_data
+        return results
 
 # }}}
 
diff --git a/pytential/symbolic/execution.py b/pytential/symbolic/execution.py
index 627ef0e96..16d1f9d0d 100644
--- a/pytential/symbolic/execution.py
+++ b/pytential/symbolic/execution.py
@@ -23,7 +23,7 @@
 THE SOFTWARE.
 """
 
-from typing import Optional
+from typing import Any, Dict, Optional
 
 from pymbolic.mapper.evaluator import (
         EvaluationMapper as PymbolicEvaluationMapper)
@@ -333,28 +333,13 @@ def map_call(self, expr):
 
 class EvaluationMapper(EvaluationMapperBase):
 
-    def __init__(self, bound_expr, actx, context=None,
-            timing_data=None):
-        EvaluationMapperBase.__init__(self, bound_expr, actx, context)
-        self.timing_data = timing_data
+    def __init__(self, bound_expr, actx, context=None):
+        super().__init__(bound_expr, actx, context)
 
     def exec_compute_potential_insn(
             self, actx: PyOpenCLArrayContext, insn, bound_expr, evaluate):
         source = bound_expr.places.get_geometry(insn.source.geometry)
-
-        return_timing_data = self.timing_data is not None
-
-        result, timing_data = (
-                source.exec_compute_potential_insn(
-                    actx, insn, bound_expr, evaluate, return_timing_data))
-
-        if return_timing_data:
-            # The compiler ensures this.
-            assert insn not in self.timing_data
-
-            self.timing_data[insn] = timing_data
-
-        return result
+        return source.exec_compute_potential_insn(actx, insn, bound_expr, evaluate)
 
 # }}}
 
@@ -408,17 +393,13 @@ def exec_compute_potential_insn(
         else:
             calibration_params = self.kernel_to_calibration_params[knls]
 
-        result, (cost_model_result, metadata) = \
-            source.cost_model_compute_potential_insn(
-                actx, insn, bound_expr, evaluate, calibration_params,
-                self.per_box)
+        result = source.cost_model_compute_potential_insn(
+            actx, insn, bound_expr, evaluate, calibration_params,
+            self.per_box)
 
         # The compiler ensures this.
         assert insn not in self.modeled_cost
 
-        self.modeled_cost[insn] = cost_model_result
-        self.metadata[insn] = metadata
-
         return result
 
     def get_modeled_cost(self):
@@ -805,14 +786,12 @@ def scipy_op(
         return MatVecOp(self, actx,
                 arg_name, dtype, total_dofs, discrs, starts_and_ends, extra_args)
 
-    def eval(self, context=None, timing_data=None,
+    def eval(self,
+            context: Optional[Dict[str, Any]] = None,
             array_context: Optional[PyOpenCLArrayContext] = None):
         """Evaluate the expression in *self*, using the
         input variables given in the dictionary *context*.
 
-        :arg timing_data: A dictionary into which timing
-            data will be inserted during evaluation.
-            (experimental)
         :arg array_context: only needs to be supplied if no instances of
             :class:`~meshmode.dof_array.DOFArray` with a
             :class:`~arraycontext.PyOpenCLArrayContext`
@@ -844,8 +823,7 @@ def eval(self, context=None, timing_data=None,
         array_context = _find_array_context_from_args_in_context(
                 context, array_context)
 
-        exec_mapper = EvaluationMapper(
-                self, array_context, context, timing_data=timing_data)
+        exec_mapper = EvaluationMapper(self, array_context, context)
         return execute(self.code, exec_mapper)
 
     def __call__(self, *args, **kwargs):
diff --git a/pytential/unregularized.py b/pytential/unregularized.py
index 2eef5fc09..3660f45db 100644
--- a/pytential/unregularized.py
+++ b/pytential/unregularized.py
@@ -97,15 +97,8 @@ def copy(
                 density_discr=density_discr or self.density_discr,
                 debug=debug if debug is not None else self.debug)
 
-    def exec_compute_potential_insn(self, actx: PyOpenCLArrayContext,
-            insn, bound_expr, evaluate, return_timing_data):
-        if return_timing_data:
-            from warnings import warn
-            from pytential.source import UnableToCollectTimingData
-            warn(
-                   "Timing data collection not supported.",
-                   category=UnableToCollectTimingData)
-
+    def exec_compute_potential_insn(self,
+            actx: PyOpenCLArrayContext, insn, bound_expr, evaluate):
         from pytools.obj_array import obj_array_vectorize
 
         def evaluate_wrapper(expr):
@@ -137,8 +130,8 @@ def preprocess_optemplate(self, name, discretizations, expr):
         from pytential.symbolic.mappers import UnregularizedPreprocessor
         return UnregularizedPreprocessor(name, discretizations)(expr)
 
-    def exec_compute_potential_insn_direct(self, actx: PyOpenCLArrayContext,
-            insn, bound_expr, evaluate):
+    def exec_compute_potential_insn_direct(self,
+            actx: PyOpenCLArrayContext, insn, bound_expr, evaluate):
         kernel_args = {}
 
         for arg_name, arg_expr in insn.kernel_arguments.items():
@@ -178,8 +171,7 @@ def exec_compute_potential_insn_direct(self, actx: PyOpenCLArrayContext,
 
             results.append((o.name, result))
 
-        timing_data = {}
-        return results, timing_data
+        return results
 
     # {{{ fmm-based execution
 
@@ -270,8 +262,7 @@ def exec_compute_potential_insn_fmm(self, actx: PyOpenCLArrayContext,
         # }}}
 
         from boxtree.fmm import drive_fmm
-        all_potentials_on_every_tgt = drive_fmm(
-                actx, wrangler, flat_strengths, timing_data=None)
+        all_potentials_on_every_tgt = drive_fmm(actx, wrangler, flat_strengths)
 
         # {{{ postprocess fmm
 
@@ -294,8 +285,7 @@ def exec_compute_potential_insn_fmm(self, actx: PyOpenCLArrayContext,
 
         # }}}
 
-        timing_data = {}
-        return results, timing_data
+        return results
 
     # }}}
 
diff --git a/test/test_cost_model.py b/test/test_cost_model.py
index 80a5693c9..c2aed190e 100644
--- a/test/test_cost_model.py
+++ b/test/test_cost_model.py
@@ -365,11 +365,7 @@ def test_timing_data_gathering(ctx_factory):
     sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1)
 
     op_S = bind(places, sym_op_S)
-
-    timing_data = {}
-    op_S.eval({"sigma": sigma}, timing_data=timing_data, array_context=actx)
-    assert timing_data
-    logging.info(timing_data)
+    op_S.eval({"sigma": sigma}, array_context=actx)
 
 # }}}
 
@@ -406,8 +402,6 @@ def test_cost_model(actx_factory, dim, use_target_specific_qbx, per_box):
     else:
         cost_S, _ = op_S.cost_per_stage("constant_one", sigma=sigma)
 
-    assert len(cost_S) == 1
-
     sym_op_S_plus_D = (
             sym.S(k_sym, sigma_sym, qbx_forced_limit=+1)
             + sym.D(k_sym, sigma_sym, qbx_forced_limit="avg"))
@@ -422,8 +416,6 @@ def test_cost_model(actx_factory, dim, use_target_specific_qbx, per_box):
             "constant_one", sigma=sigma
         )
 
-    assert len(cost_S_plus_D) == 2
-
 # }}}
 
 
@@ -454,7 +446,6 @@ def test_cost_model_metadata_gathering(actx_factory):
     _, metadata = op_S.cost_per_stage(
         "constant_one", sigma=sigma, k=k, return_metadata=True
     )
-    metadata, = metadata.values()
 
     geo_data = lpot_source.qbx_fmm_geometry_data(
             places,
@@ -463,6 +454,9 @@ def test_cost_model_metadata_gathering(actx_factory):
 
     tree = geo_data.tree()
 
+    if not metadata:
+        return
+
     assert metadata["p_qbx"] == QBX_ORDER
     assert metadata["nlevels"] == tree.nlevels
     assert metadata["nsources"] == tree.nsources
@@ -519,10 +513,9 @@ def reorder_potentials(self, potentials):
     def form_global_qbx_locals(self, actx, src_weight_vecs):
         src_weights, = src_weight_vecs
         local_exps = self.qbx_local_expansion_zeros()
-        ops = 0
 
         if self.using_tsqbx:
-            return local_exps, self.timing_future(ops)
+            return local_exps
 
         global_qbx_centers = self.geo_data.global_qbx_centers()
         qbx_center_to_target_box = self.geo_data.qbx_center_to_target_box()
@@ -536,16 +529,14 @@ def form_global_qbx_locals(self, actx, src_weight_vecs):
             src_sum = 0
             for src_ibox in self.trav.neighbor_source_boxes_lists[start:end]:
                 src_pslice = self._get_source_slice(src_ibox)
-                ops += src_pslice.stop - src_pslice.start
                 src_sum += np.sum(src_weights[src_pslice])
 
             local_exps[tgt_icenter] = src_sum
 
-        return local_exps, self.timing_future(ops)
+        return local_exps
 
     def translate_box_multipoles_to_qbx_local(self, actx, multipole_exps):
         local_exps = self.qbx_local_expansion_zeros()
-        ops = 0
 
         global_qbx_centers = self.geo_data.global_qbx_centers()
 
@@ -565,13 +556,11 @@ def translate_box_multipoles_to_qbx_local(self, actx, multipole_exps):
 
                 for src_ibox in ssn.lists[start:stop]:
                     local_exps[tgt_icenter] += multipole_exps[src_ibox]
-                    ops += 1
 
-        return local_exps, self.timing_future(ops)
+        return local_exps
 
     def translate_box_local_to_qbx_local(self, actx, local_exps):
         qbx_expansions = self.qbx_local_expansion_zeros()
-        ops = 0
 
         global_qbx_centers = self.geo_data.global_qbx_centers()
         qbx_center_to_target_box = self.geo_data.qbx_center_to_target_box()
@@ -580,13 +569,11 @@ def translate_box_local_to_qbx_local(self, actx, local_exps):
             isrc_box = qbx_center_to_target_box[tgt_icenter]
             src_ibox = self.trav.target_boxes[isrc_box]
             qbx_expansions[tgt_icenter] += local_exps[src_ibox]
-            ops += 1
 
-        return qbx_expansions, self.timing_future(ops)
+        return qbx_expansions
 
     def eval_qbx_expansions(self, actx, qbx_expansions):
         output = self.full_output_zeros(qbx_expansions)
-        ops = 0
 
         global_qbx_centers = self.geo_data.global_qbx_centers()
         center_to_tree_targets = self.geo_data.center_to_tree_targets()
@@ -597,17 +584,15 @@ def eval_qbx_expansions(self, actx, qbx_expansions):
             for icenter_tgt in range(start, end):
                 center_itgt = center_to_tree_targets.lists[icenter_tgt]
                 output[0][center_itgt] += qbx_expansions[src_icenter]
-                ops += 1
 
-        return output, self.timing_future(ops)
+        return output
 
     def eval_target_specific_qbx_locals(self, actx, src_weight_vecs):
         src_weights, = src_weight_vecs
         pot = self.full_output_zeros(src_weights)
-        ops = 0
 
         if not self.using_tsqbx:
-            return pot, self.timing_future(ops)
+            return pot
 
         global_qbx_centers = self.geo_data.global_qbx_centers()
         center_to_tree_targets = self.geo_data.center_to_tree_targets()
@@ -648,9 +633,7 @@ def eval_target_specific_qbx_locals(self, actx, src_weight_vecs):
                 ctr_itgt = center_to_tree_targets.lists[ictr_tgt]
                 pot[0][ctr_itgt] = src_sum
 
-            ops += (ictr_tgt_end - ictr_tgt_start) * nsrcs
-
-        return pot, self.timing_future(ops)
+        return pot
 
 # }}}
 
@@ -738,7 +721,6 @@ def test_cost_model_correctness(actx_factory, dim, off_surface,
     sigma = get_density(actx, density_discr)
 
     modeled_time, _ = op_S.cost_per_stage("constant_one", sigma=sigma)
-    modeled_time, = modeled_time.values()
 
     # Run FMM with ConstantOneWrangler. This can't be done with pytential's
     # high-level interface, so call the FMM driver directly.
@@ -758,12 +740,17 @@ def test_cost_model_correctness(actx_factory, dim, off_surface,
 
     timing_data = {}
     potential = drive_fmm(
-        actx, wrangler, (src_weights,), timing_data,
+        actx, wrangler, (src_weights,),
         traversal=wrangler.trav)[0][geo_data.ncenters:]
 
     # Check constant one wrangler for correctness.
     assert np.all(potential == ndofs)
 
+    if not timing_data:
+        return
+
+    modeled_time, = modeled_time.values()
+
     # Check that the cost model matches the timing data returned by the
     # constant one wrangler.
     mismatches = []
@@ -829,15 +816,12 @@ def level_to_order_constant(kernel, kernel_args, tree, level):
     cost_constant, metadata = bind(places, sym_op).cost_per_stage(
             "constant_one", sigma=sigma)
 
-    cost_constant, = cost_constant.values()
-    metadata, = metadata.values()
-
     # }}}
 
     # {{{ varying level to order
 
     def level_to_order_varying(kernel, kernel_args, tree, level):
-        return metadata["nlevels"] - level
+        return tree.nlevels - level
 
     lpot_source = get_lpot_source(actx, 2).copy(
             cost_model=QBXCostModel(),
@@ -851,10 +835,15 @@ def level_to_order_varying(kernel, kernel_args, tree, level):
     cost_varying, _ = bind(lpot_source, sym_op).cost_per_stage(
         "constant_one", sigma=sigma)
 
-    cost_varying, = cost_varying.values()
-
     # }}}
 
+    if not metadata:
+        return
+
+    cost_constant, = cost_constant.values()
+    metadata, = metadata.values()
+    cost_varying, = cost_varying.values()
+
     assert sum(cost_varying.values()) > sum(cost_constant.values())
 
 # }}}