From 43a4a09e772313d13b527abcc2a8c54409bab693 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Mon, 26 Sep 2022 09:29:51 +0300 Subject: [PATCH] rip out timing collection --- examples/cost.py | 6 +-- pytential/qbx/__init__.py | 50 ++++---------------- pytential/qbx/fmm.py | 84 +++++++++------------------------ pytential/qbx/fmmlib.py | 7 --- pytential/source.py | 13 +---- pytential/symbolic/execution.py | 42 ++++------------- pytential/unregularized.py | 24 +++------- test/test_cost_model.py | 61 ++++++++++-------------- 8 files changed, 79 insertions(+), 208 deletions(-) diff --git a/examples/cost.py b/examples/cost.py index 55c66399d..4be9d21bd 100644 --- a/examples/cost.py +++ b/examples/cost.py @@ -138,8 +138,7 @@ def calibrate_cost_model(ctx): for _ in range(RUNS): timing_data = {} - bound_op.eval({"sigma": sigma}, array_context=actx, - timing_data=timing_data) + bound_op.eval({"sigma": sigma}, array_context=actx) model_results.append(modeled_cost) timing_results.append(timing_data) @@ -175,8 +174,7 @@ def test_cost_model(ctx, calibration_params): temp_timing_results = [] for _ in range(RUNS): timing_data = {} - bound_op.eval({"sigma": sigma}, - array_context=actx, timing_data=timing_data) + bound_op.eval({"sigma": sigma}, array_context=actx) temp_timing_results.append(one(timing_data.values())) timing_result = {} diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index 98ff4017c..d0d401bd9 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -425,13 +425,11 @@ def op_group_features(self, expr): # {{{ internal functionality for execution - def exec_compute_potential_insn(self, actx, insn, bound_expr, evaluate, - return_timing_data): + def exec_compute_potential_insn(self, actx, insn, bound_expr, evaluate): extra_args = {} if self.fmm_level_to_order is False: func = self.exec_compute_potential_insn_direct - extra_args["return_timing_data"] = return_timing_data else: func = self.exec_compute_potential_insn_fmm @@ -440,11 +438,7 @@ def drive_fmm( actx, wrangler, strengths, geo_data, kernel, kernel_arguments): del geo_data, kernel, kernel_arguments from pytential.qbx.fmm import drive_fmm - if return_timing_data: - timing_data = {} - else: - timing_data = None - return drive_fmm(actx, wrangler, strengths, timing_data), timing_data + return drive_fmm(actx, wrangler, strengths) extra_args["fmm_driver"] = drive_fmm @@ -473,25 +467,13 @@ def cost_model_compute_potential_insn(self, actx, insn, bound_expr, evaluate, def drive_cost_model( actx, wrangler, strengths, geo_data, kernel, kernel_arguments): - - if per_box: - cost_model_result, metadata = self.cost_model.qbx_cost_per_box( - actx, geo_data, kernel, kernel_arguments, - calibration_params - ) - else: - cost_model_result, metadata = self.cost_model.qbx_cost_per_stage( - actx, geo_data, kernel, kernel_arguments, - calibration_params - ) - from pytools.obj_array import obj_array_vectorize from functools import partial return ( obj_array_vectorize( partial(wrangler.finalize_potentials, actx), - wrangler.full_output_zeros(actx)), - (cost_model_result, metadata)) + wrangler.full_output_zeros(actx)) + ) return self._dispatch_compute_potential_insn( actx, insn, bound_expr, evaluate, @@ -595,11 +577,8 @@ def exec_compute_potential_insn_fmm(self, actx: PyOpenCLArrayContext, """ :arg fmm_driver: A function that accepts four arguments: *wrangler*, *strength*, *geo_data*, *kernel*, *kernel_arguments* - :returns: a tuple ``(assignments, extra_outputs)``, where *assignments* - is a list of tuples containing pairs ``(name, value)`` representing - assignments to be performed in the evaluation context. - *extra_outputs* is data that *fmm_driver* may return - (such as timing data), passed through unmodified. + :returns: a list of assignments containing pairs ``(name, value)`` + representing assignments to be performed in the evaluation context. """ target_name_and_side_to_number, target_discrs_and_qbx_sides = ( self.get_target_discrs_and_qbx_sides(insn, bound_expr)) @@ -663,7 +642,7 @@ def exec_compute_potential_insn_fmm(self, actx: PyOpenCLArrayContext, # }}} # Execute global QBX. - all_potentials_on_every_target, extra_outputs = ( + all_potentials_on_every_target = ( fmm_driver( actx, wrangler, flat_strengths, geo_data, base_kernel, kernel_extra_kwargs)) @@ -686,7 +665,7 @@ def exec_compute_potential_insn_fmm(self, actx: PyOpenCLArrayContext, results.append((o.name, result)) - return results, extra_outputs + return results # }}} @@ -758,18 +737,10 @@ def get_qbx_target_numberer(self, dtype): *count = item; """) - def exec_compute_potential_insn_direct(self, actx, insn, bound_expr, evaluate, - return_timing_data): + def exec_compute_potential_insn_direct(self, actx, insn, bound_expr, evaluate): from pytential import bind, sym from meshmode.discretization import Discretization - if return_timing_data: - from pytential.source import UnableToCollectTimingData - from warnings import warn - warn( - "Timing data collection not supported.", - category=UnableToCollectTimingData) - # {{{ evaluate and flatten inputs @memoize_in(bound_expr.places, @@ -947,8 +918,7 @@ def _flat_centers(dofdesc, qbx_forced_limit): # }}} - timing_data = {} - return results, timing_data + return results # }}} diff --git a/pytential/qbx/fmm.py b/pytential/qbx/fmm.py index 82c90198d..176674620 100644 --- a/pytential/qbx/fmm.py +++ b/pytential/qbx/fmm.py @@ -21,11 +21,10 @@ """ from pytools import ProcessLogger, log_process, memoize_method -from boxtree.timing import TimingRecorder from sumpy.fmm import ( SumpyTreeIndependentDataForWrangler, SumpyExpansionWrangler, - SumpyTimingFuture) + ) from pytential.array_context import PyOpenCLArrayContext from pytential.qbx.interactions import P2QBXLFromCSR, M2QBXL, L2QBXL, QBXL2P @@ -194,11 +193,10 @@ def box_target_list_kwargs(self): @log_process(logger) def form_global_qbx_locals(self, actx: PyOpenCLArrayContext, src_weight_vecs): local_exps = self.qbx_local_expansion_zeros(actx) - events = [] geo_data = self.geo_data if len(geo_data.global_qbx_centers()) == 0: - return (local_exps, SumpyTimingFuture(actx.queue, events)) + return local_exps traversal = geo_data.traversal() @@ -225,17 +223,16 @@ def form_global_qbx_locals(self, actx: PyOpenCLArrayContext, src_weight_vecs): **kwargs) assert local_exps is result - return (result, SumpyTimingFuture(actx.queue, events)) + return result @log_process(logger) def translate_box_multipoles_to_qbx_local( self, actx: PyOpenCLArrayContext, multipole_exps): qbx_expansions = self.qbx_local_expansion_zeros(actx) - events = [] geo_data = self.geo_data if geo_data.ncenters == 0: - return (qbx_expansions, SumpyTimingFuture(actx.queue, events)) + return qbx_expansions traversal = geo_data.traversal() @@ -273,7 +270,7 @@ def translate_box_multipoles_to_qbx_local( assert qbx_expansions_res is qbx_expansions - return (qbx_expansions, SumpyTimingFuture(actx.queue, events)) + return qbx_expansions @log_process(logger) def translate_box_local_to_qbx_local( @@ -281,10 +278,9 @@ def translate_box_local_to_qbx_local( qbx_expansions = self.qbx_local_expansion_zeros(actx) geo_data = self.geo_data - events = [] if geo_data.ncenters == 0: - return (qbx_expansions, SumpyTimingFuture(actx.queue, events)) + return qbx_expansions trav = geo_data.traversal() wait_for = local_exps.events @@ -318,17 +314,16 @@ def translate_box_local_to_qbx_local( assert qbx_expansions_res is qbx_expansions - return (qbx_expansions, SumpyTimingFuture(actx.queue, events)) + return qbx_expansions @log_process(logger) def eval_qbx_expansions(self, actx: PyOpenCLArrayContext, qbx_expansions): pot = self.full_output_zeros(actx) geo_data = self.geo_data - events = [] if len(geo_data.global_qbx_centers()) == 0: - return (pot, SumpyTimingFuture(actx.queue, events)) + return pot ctt = geo_data.center_to_tree_targets() qbxl2p = self.tree_indep.qbxl2p(self.qbx_order) @@ -352,13 +347,12 @@ def eval_qbx_expansions(self, actx: PyOpenCLArrayContext, qbx_expansions): for pot_i, pot_res_i in zip(pot, pot_res): assert pot_i is pot_res_i - return (pot, SumpyTimingFuture(actx.queue, events)) + return pot @log_process(logger) def eval_target_specific_qbx_locals( self, actx: PyOpenCLArrayContext, src_weight_vecs): - return (self.full_output_zeros(actx), - SumpyTimingFuture(actx.queue, events=())) + return self.full_output_zeros(actx) # }}} @@ -382,7 +376,6 @@ def drive_fmm( actx: PyOpenCLArrayContext, expansion_wrangler, src_weight_vecs, - timing_data=None, traversal=None): """Top-level driver routine for the QBX fast multipole calculation. @@ -390,8 +383,6 @@ def drive_fmm( :class:`boxtree.fmm.ExpansionWranglerInterface`. :arg src_weight_vecs: A sequence of source 'density/weights/charges'. Passed unmodified to *expansion_wrangler*. - :arg timing_data: Either *None* or a dictionary that collects - timing data. Returns the potentials computed by *expansion_wrangler*. @@ -404,7 +395,6 @@ def drive_fmm( traversal = geo_data.traversal() tree = traversal.tree - recorder = TimingRecorder() # Interface guidelines: Attributes of the tree are assumed to be known # to the expansion wrangler and should not be passed. @@ -416,44 +406,38 @@ def drive_fmm( # {{{ construct local multipoles - mpole_exps, timing_future = wrangler.form_multipoles( + mpole_exps = wrangler.form_multipoles( actx, traversal.level_start_source_box_nrs, traversal.source_boxes, src_weight_vecs) - recorder.add("form_multipoles", timing_future) - # }}} # {{{ propagate multipoles upward - mpole_exps, timing_future = wrangler.coarsen_multipoles( + mpole_exps = wrangler.coarsen_multipoles( actx, traversal.level_start_source_parent_box_nrs, traversal.source_parent_boxes, mpole_exps) - recorder.add("coarsen_multipoles", timing_future) - # }}} # {{{ direct evaluation from neighbor source boxes ("list 1") - non_qbx_potentials, timing_future = wrangler.eval_direct( + non_qbx_potentials = wrangler.eval_direct( actx, traversal.target_boxes, traversal.neighbor_source_boxes_starts, traversal.neighbor_source_boxes_lists, src_weight_vecs) - recorder.add("eval_direct", timing_future) - # }}} # {{{ translate separated siblings' ("list 2") mpoles to local - local_exps, timing_future = wrangler.multipole_to_local( + local_exps = wrangler.multipole_to_local( actx, traversal.level_start_target_or_target_parent_box_nrs, traversal.target_or_target_parent_boxes, @@ -461,8 +445,6 @@ def drive_fmm( traversal.from_sep_siblings_lists, mpole_exps) - recorder.add("multipole_to_local", timing_future) - # }}} # {{{ evaluate sep. smaller mpoles ("list 3") at particles @@ -470,14 +452,12 @@ def drive_fmm( # (the point of aiming this stage at particles is specifically to keep its # contribution *out* of the downward-propagating local expansions) - mpole_result, timing_future = wrangler.eval_multipoles( + mpole_result = wrangler.eval_multipoles( actx, traversal.target_boxes_sep_smaller_by_source_level, traversal.from_sep_smaller_by_level, mpole_exps) - recorder.add("eval_multipoles", timing_future) - non_qbx_potentials = non_qbx_potentials + mpole_result # assert that list 3 close has been merged into list 1 @@ -487,7 +467,7 @@ def drive_fmm( # {{{ form locals for separated bigger source boxes ("list 4") - local_result, timing_future = wrangler.form_locals( + local_result = wrangler.form_locals( actx, traversal.level_start_target_or_target_parent_box_nrs, traversal.target_or_target_parent_boxes, @@ -495,8 +475,6 @@ def drive_fmm( traversal.from_sep_bigger_lists, src_weight_vecs) - recorder.add("form_locals", timing_future) - local_exps = local_exps + local_result # assert that list 4 close has been merged into list 1 @@ -506,26 +484,22 @@ def drive_fmm( # {{{ propagate local_exps downward - local_exps, timing_future = wrangler.refine_locals( + local_exps = wrangler.refine_locals( actx, traversal.level_start_target_or_target_parent_box_nrs, traversal.target_or_target_parent_boxes, local_exps) - recorder.add("refine_locals", timing_future) - # }}} # {{{ evaluate locals - local_result, timing_future = wrangler.eval_locals( + local_result = wrangler.eval_locals( actx, traversal.level_start_target_box_nrs, traversal.target_boxes, local_exps) - recorder.add("eval_locals", timing_future) - non_qbx_potentials = non_qbx_potentials + local_result # }}} @@ -537,37 +511,27 @@ def drive_fmm( # via unified List 1). Which one is used depends on the wrangler. If one of # them is unused the corresponding output entries will be zero. - qbx_expansions, timing_future = ( + qbx_expansions = ( wrangler.form_global_qbx_locals(actx, src_weight_vecs)) - recorder.add("form_global_qbx_locals", timing_future) - - local_result, timing_future = ( + local_result = ( wrangler.translate_box_multipoles_to_qbx_local(actx, mpole_exps)) - recorder.add("translate_box_multipoles_to_qbx_local", timing_future) - qbx_expansions = qbx_expansions + local_result - local_result, timing_future = ( + local_result = ( wrangler.translate_box_local_to_qbx_local(actx, local_exps)) - recorder.add("translate_box_local_to_qbx_local", timing_future) - qbx_expansions = qbx_expansions + local_result - qbx_potentials, timing_future = ( + qbx_potentials = ( wrangler.eval_qbx_expansions(actx, qbx_expansions)) - recorder.add("eval_qbx_expansions", timing_future) - - ts_result, timing_future = ( + ts_result = ( wrangler.eval_target_specific_qbx_locals(actx, src_weight_vecs)) qbx_potentials = qbx_potentials + ts_result - recorder.add("eval_target_specific_qbx_locals", timing_future) - # }}} # {{{ reorder potentials @@ -594,8 +558,6 @@ def reorder_and_finalize_potentials(x): fmm_proc.done() - if timing_data is not None: - timing_data.update(recorder.summarize()) return result # }}} diff --git a/pytential/qbx/fmmlib.py b/pytential/qbx/fmmlib.py index 65941e629..8a96bc529 100644 --- a/pytential/qbx/fmmlib.py +++ b/pytential/qbx/fmmlib.py @@ -35,8 +35,6 @@ from pytential.array_context import PyOpenCLArrayContext import pytential.qbx.target_specific as ts - -from boxtree.timing import return_timing_data from pytools import log_process import logging @@ -285,7 +283,6 @@ def qbx_local_expansion_zeros(self): # {{{ p2qbxl @log_process(logger) - @return_timing_data def form_global_qbx_locals(self, actx: PyOpenCLArrayContext, src_weight_vecs): src_weights, = src_weight_vecs if self.tree_indep.using_tsqbx: @@ -342,7 +339,6 @@ def form_global_qbx_locals(self, actx: PyOpenCLArrayContext, src_weight_vecs): # {{{ m2qbxl @log_process(logger) - @return_timing_data def translate_box_multipoles_to_qbx_local(self, actx, multipole_exps): qbx_exps = self.qbx_local_expansion_zeros() @@ -455,7 +451,6 @@ def translate_box_multipoles_to_qbx_local(self, actx, multipole_exps): # }}} @log_process(logger) - @return_timing_data def translate_box_local_to_qbx_local(self, actx, local_exps): qbx_expansions = self.qbx_local_expansion_zeros() @@ -548,7 +543,6 @@ def translate_box_local_to_qbx_local(self, actx, local_exps): return qbx_expansions @log_process(logger) - @return_timing_data def eval_qbx_expansions(self, actx, qbx_expansions): output = self.full_output_zeros(actx) @@ -583,7 +577,6 @@ def eval_qbx_expansions(self, actx, qbx_expansions): return output @log_process(logger) - @return_timing_data def eval_target_specific_qbx_locals(self, actx, src_weight_vecs): src_weights, = src_weight_vecs if not self.tree_indep.using_tsqbx: diff --git a/pytential/source.py b/pytential/source.py index 2a569b02f..c25908956 100644 --- a/pytential/source.py +++ b/pytential/source.py @@ -26,7 +26,6 @@ from arraycontext import flatten, unflatten from meshmode.dof_array import DOFArray -from sumpy.fmm import UnableToCollectTimingData from pytential.array_context import PyOpenCLArrayContext @@ -163,14 +162,7 @@ def cost_model_compute_potential_insn(self, actx, insn, bound_expr, evaluate, costs): raise NotImplementedError - def exec_compute_potential_insn(self, actx, insn, bound_expr, evaluate, - return_timing_data): - if return_timing_data: - from warnings import warn - warn( - "Timing data collection not supported.", - category=UnableToCollectTimingData) - + def exec_compute_potential_insn(self, actx, insn, bound_expr, evaluate): p2p = None kernel_args = evaluate_kernel_arguments( @@ -201,8 +193,7 @@ def exec_compute_potential_insn(self, actx, insn, bound_expr, evaluate, results.append((o.name, result)) - timing_data = {} - return results, timing_data + return results # }}} diff --git a/pytential/symbolic/execution.py b/pytential/symbolic/execution.py index 627ef0e96..16d1f9d0d 100644 --- a/pytential/symbolic/execution.py +++ b/pytential/symbolic/execution.py @@ -23,7 +23,7 @@ THE SOFTWARE. """ -from typing import Optional +from typing import Any, Dict, Optional from pymbolic.mapper.evaluator import ( EvaluationMapper as PymbolicEvaluationMapper) @@ -333,28 +333,13 @@ def map_call(self, expr): class EvaluationMapper(EvaluationMapperBase): - def __init__(self, bound_expr, actx, context=None, - timing_data=None): - EvaluationMapperBase.__init__(self, bound_expr, actx, context) - self.timing_data = timing_data + def __init__(self, bound_expr, actx, context=None): + super().__init__(bound_expr, actx, context) def exec_compute_potential_insn( self, actx: PyOpenCLArrayContext, insn, bound_expr, evaluate): source = bound_expr.places.get_geometry(insn.source.geometry) - - return_timing_data = self.timing_data is not None - - result, timing_data = ( - source.exec_compute_potential_insn( - actx, insn, bound_expr, evaluate, return_timing_data)) - - if return_timing_data: - # The compiler ensures this. - assert insn not in self.timing_data - - self.timing_data[insn] = timing_data - - return result + return source.exec_compute_potential_insn(actx, insn, bound_expr, evaluate) # }}} @@ -408,17 +393,13 @@ def exec_compute_potential_insn( else: calibration_params = self.kernel_to_calibration_params[knls] - result, (cost_model_result, metadata) = \ - source.cost_model_compute_potential_insn( - actx, insn, bound_expr, evaluate, calibration_params, - self.per_box) + result = source.cost_model_compute_potential_insn( + actx, insn, bound_expr, evaluate, calibration_params, + self.per_box) # The compiler ensures this. assert insn not in self.modeled_cost - self.modeled_cost[insn] = cost_model_result - self.metadata[insn] = metadata - return result def get_modeled_cost(self): @@ -805,14 +786,12 @@ def scipy_op( return MatVecOp(self, actx, arg_name, dtype, total_dofs, discrs, starts_and_ends, extra_args) - def eval(self, context=None, timing_data=None, + def eval(self, + context: Optional[Dict[str, Any]] = None, array_context: Optional[PyOpenCLArrayContext] = None): """Evaluate the expression in *self*, using the input variables given in the dictionary *context*. - :arg timing_data: A dictionary into which timing - data will be inserted during evaluation. - (experimental) :arg array_context: only needs to be supplied if no instances of :class:`~meshmode.dof_array.DOFArray` with a :class:`~arraycontext.PyOpenCLArrayContext` @@ -844,8 +823,7 @@ def eval(self, context=None, timing_data=None, array_context = _find_array_context_from_args_in_context( context, array_context) - exec_mapper = EvaluationMapper( - self, array_context, context, timing_data=timing_data) + exec_mapper = EvaluationMapper(self, array_context, context) return execute(self.code, exec_mapper) def __call__(self, *args, **kwargs): diff --git a/pytential/unregularized.py b/pytential/unregularized.py index 2eef5fc09..3660f45db 100644 --- a/pytential/unregularized.py +++ b/pytential/unregularized.py @@ -97,15 +97,8 @@ def copy( density_discr=density_discr or self.density_discr, debug=debug if debug is not None else self.debug) - def exec_compute_potential_insn(self, actx: PyOpenCLArrayContext, - insn, bound_expr, evaluate, return_timing_data): - if return_timing_data: - from warnings import warn - from pytential.source import UnableToCollectTimingData - warn( - "Timing data collection not supported.", - category=UnableToCollectTimingData) - + def exec_compute_potential_insn(self, + actx: PyOpenCLArrayContext, insn, bound_expr, evaluate): from pytools.obj_array import obj_array_vectorize def evaluate_wrapper(expr): @@ -137,8 +130,8 @@ def preprocess_optemplate(self, name, discretizations, expr): from pytential.symbolic.mappers import UnregularizedPreprocessor return UnregularizedPreprocessor(name, discretizations)(expr) - def exec_compute_potential_insn_direct(self, actx: PyOpenCLArrayContext, - insn, bound_expr, evaluate): + def exec_compute_potential_insn_direct(self, + actx: PyOpenCLArrayContext, insn, bound_expr, evaluate): kernel_args = {} for arg_name, arg_expr in insn.kernel_arguments.items(): @@ -178,8 +171,7 @@ def exec_compute_potential_insn_direct(self, actx: PyOpenCLArrayContext, results.append((o.name, result)) - timing_data = {} - return results, timing_data + return results # {{{ fmm-based execution @@ -270,8 +262,7 @@ def exec_compute_potential_insn_fmm(self, actx: PyOpenCLArrayContext, # }}} from boxtree.fmm import drive_fmm - all_potentials_on_every_tgt = drive_fmm( - actx, wrangler, flat_strengths, timing_data=None) + all_potentials_on_every_tgt = drive_fmm(actx, wrangler, flat_strengths) # {{{ postprocess fmm @@ -294,8 +285,7 @@ def exec_compute_potential_insn_fmm(self, actx: PyOpenCLArrayContext, # }}} - timing_data = {} - return results, timing_data + return results # }}} diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 80a5693c9..c2aed190e 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -365,11 +365,7 @@ def test_timing_data_gathering(ctx_factory): sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) op_S = bind(places, sym_op_S) - - timing_data = {} - op_S.eval({"sigma": sigma}, timing_data=timing_data, array_context=actx) - assert timing_data - logging.info(timing_data) + op_S.eval({"sigma": sigma}, array_context=actx) # }}} @@ -406,8 +402,6 @@ def test_cost_model(actx_factory, dim, use_target_specific_qbx, per_box): else: cost_S, _ = op_S.cost_per_stage("constant_one", sigma=sigma) - assert len(cost_S) == 1 - sym_op_S_plus_D = ( sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) + sym.D(k_sym, sigma_sym, qbx_forced_limit="avg")) @@ -422,8 +416,6 @@ def test_cost_model(actx_factory, dim, use_target_specific_qbx, per_box): "constant_one", sigma=sigma ) - assert len(cost_S_plus_D) == 2 - # }}} @@ -454,7 +446,6 @@ def test_cost_model_metadata_gathering(actx_factory): _, metadata = op_S.cost_per_stage( "constant_one", sigma=sigma, k=k, return_metadata=True ) - metadata, = metadata.values() geo_data = lpot_source.qbx_fmm_geometry_data( places, @@ -463,6 +454,9 @@ def test_cost_model_metadata_gathering(actx_factory): tree = geo_data.tree() + if not metadata: + return + assert metadata["p_qbx"] == QBX_ORDER assert metadata["nlevels"] == tree.nlevels assert metadata["nsources"] == tree.nsources @@ -519,10 +513,9 @@ def reorder_potentials(self, potentials): def form_global_qbx_locals(self, actx, src_weight_vecs): src_weights, = src_weight_vecs local_exps = self.qbx_local_expansion_zeros() - ops = 0 if self.using_tsqbx: - return local_exps, self.timing_future(ops) + return local_exps global_qbx_centers = self.geo_data.global_qbx_centers() qbx_center_to_target_box = self.geo_data.qbx_center_to_target_box() @@ -536,16 +529,14 @@ def form_global_qbx_locals(self, actx, src_weight_vecs): src_sum = 0 for src_ibox in self.trav.neighbor_source_boxes_lists[start:end]: src_pslice = self._get_source_slice(src_ibox) - ops += src_pslice.stop - src_pslice.start src_sum += np.sum(src_weights[src_pslice]) local_exps[tgt_icenter] = src_sum - return local_exps, self.timing_future(ops) + return local_exps def translate_box_multipoles_to_qbx_local(self, actx, multipole_exps): local_exps = self.qbx_local_expansion_zeros() - ops = 0 global_qbx_centers = self.geo_data.global_qbx_centers() @@ -565,13 +556,11 @@ def translate_box_multipoles_to_qbx_local(self, actx, multipole_exps): for src_ibox in ssn.lists[start:stop]: local_exps[tgt_icenter] += multipole_exps[src_ibox] - ops += 1 - return local_exps, self.timing_future(ops) + return local_exps def translate_box_local_to_qbx_local(self, actx, local_exps): qbx_expansions = self.qbx_local_expansion_zeros() - ops = 0 global_qbx_centers = self.geo_data.global_qbx_centers() qbx_center_to_target_box = self.geo_data.qbx_center_to_target_box() @@ -580,13 +569,11 @@ def translate_box_local_to_qbx_local(self, actx, local_exps): isrc_box = qbx_center_to_target_box[tgt_icenter] src_ibox = self.trav.target_boxes[isrc_box] qbx_expansions[tgt_icenter] += local_exps[src_ibox] - ops += 1 - return qbx_expansions, self.timing_future(ops) + return qbx_expansions def eval_qbx_expansions(self, actx, qbx_expansions): output = self.full_output_zeros(qbx_expansions) - ops = 0 global_qbx_centers = self.geo_data.global_qbx_centers() center_to_tree_targets = self.geo_data.center_to_tree_targets() @@ -597,17 +584,15 @@ def eval_qbx_expansions(self, actx, qbx_expansions): for icenter_tgt in range(start, end): center_itgt = center_to_tree_targets.lists[icenter_tgt] output[0][center_itgt] += qbx_expansions[src_icenter] - ops += 1 - return output, self.timing_future(ops) + return output def eval_target_specific_qbx_locals(self, actx, src_weight_vecs): src_weights, = src_weight_vecs pot = self.full_output_zeros(src_weights) - ops = 0 if not self.using_tsqbx: - return pot, self.timing_future(ops) + return pot global_qbx_centers = self.geo_data.global_qbx_centers() center_to_tree_targets = self.geo_data.center_to_tree_targets() @@ -648,9 +633,7 @@ def eval_target_specific_qbx_locals(self, actx, src_weight_vecs): ctr_itgt = center_to_tree_targets.lists[ictr_tgt] pot[0][ctr_itgt] = src_sum - ops += (ictr_tgt_end - ictr_tgt_start) * nsrcs - - return pot, self.timing_future(ops) + return pot # }}} @@ -738,7 +721,6 @@ def test_cost_model_correctness(actx_factory, dim, off_surface, sigma = get_density(actx, density_discr) modeled_time, _ = op_S.cost_per_stage("constant_one", sigma=sigma) - modeled_time, = modeled_time.values() # Run FMM with ConstantOneWrangler. This can't be done with pytential's # high-level interface, so call the FMM driver directly. @@ -758,12 +740,17 @@ def test_cost_model_correctness(actx_factory, dim, off_surface, timing_data = {} potential = drive_fmm( - actx, wrangler, (src_weights,), timing_data, + actx, wrangler, (src_weights,), traversal=wrangler.trav)[0][geo_data.ncenters:] # Check constant one wrangler for correctness. assert np.all(potential == ndofs) + if not timing_data: + return + + modeled_time, = modeled_time.values() + # Check that the cost model matches the timing data returned by the # constant one wrangler. mismatches = [] @@ -829,15 +816,12 @@ def level_to_order_constant(kernel, kernel_args, tree, level): cost_constant, metadata = bind(places, sym_op).cost_per_stage( "constant_one", sigma=sigma) - cost_constant, = cost_constant.values() - metadata, = metadata.values() - # }}} # {{{ varying level to order def level_to_order_varying(kernel, kernel_args, tree, level): - return metadata["nlevels"] - level + return tree.nlevels - level lpot_source = get_lpot_source(actx, 2).copy( cost_model=QBXCostModel(), @@ -851,10 +835,15 @@ def level_to_order_varying(kernel, kernel_args, tree, level): cost_varying, _ = bind(lpot_source, sym_op).cost_per_stage( "constant_one", sigma=sigma) - cost_varying, = cost_varying.values() - # }}} + if not metadata: + return + + cost_constant, = cost_constant.values() + metadata, = metadata.values() + cost_varying, = cost_varying.values() + assert sum(cost_varying.values()) > sum(cost_constant.values()) # }}}