diff --git a/doc/conf.py b/doc/conf.py index b8922b280..9f58c1e5c 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -18,6 +18,10 @@ "DOFDescriptorLike": "pytential.symbolic.dof_desc.DOFDescriptorLike", } +nitpick_ignore_regex = [ + ["py:class", r"boxtree\.array_context\.(.+)"], + ] + intersphinx_mapping = { "arraycontext": ("https://documen.tician.de/arraycontext", None), "boxtree": ("https://documen.tician.de/boxtree", None), diff --git a/doc/qbx.rst b/doc/qbx.rst index e3b9e95d0..1219a8f84 100644 --- a/doc/qbx.rst +++ b/doc/qbx.rst @@ -8,6 +8,11 @@ QBX internals This page documents :mod:`pytential` internals and is not typically needed in end-user applications. +Array Context +------------- + +.. automodule:: pytential.array_context + Refinement ---------- diff --git a/examples/helmholtz-dirichlet.py b/examples/helmholtz-dirichlet.py index b046a6935..f9283116f 100644 --- a/examples/helmholtz-dirichlet.py +++ b/examples/helmholtz-dirichlet.py @@ -1,13 +1,13 @@ import numpy as np import numpy.linalg as la -from meshmode.array_context import PyOpenCLArrayContext from meshmode.discretization import Discretization from meshmode.discretization.poly_element import ( InterpolatoryQuadratureSimplexGroupFactory, ) from pytential import bind, sym +from pytential.array_context import PyOpenCLArrayContext from pytential.target import PointsTarget diff --git a/examples/laplace-dirichlet-3d.py b/examples/laplace-dirichlet-3d.py index b49d69e1a..5b3358eba 100644 --- a/examples/laplace-dirichlet-3d.py +++ b/examples/laplace-dirichlet-3d.py @@ -1,12 +1,12 @@ import numpy as np -from meshmode.array_context import PyOpenCLArrayContext from meshmode.discretization import Discretization from meshmode.discretization.poly_element import ( InterpolatoryQuadratureSimplexGroupFactory, ) from pytential import bind, sym +from pytential.array_context import PyOpenCLArrayContext from pytential.target import PointsTarget diff --git a/examples/laplace-dirichlet-simple.py b/examples/laplace-dirichlet-simple.py index d6d5858bd..572f6498d 100644 --- a/examples/laplace-dirichlet-simple.py +++ b/examples/laplace-dirichlet-simple.py @@ -1,12 +1,12 @@ import numpy as np -from meshmode.array_context import PyOpenCLArrayContext from meshmode.discretization import Discretization from meshmode.discretization.poly_element import ( InterpolatoryQuadratureSimplexGroupFactory, ) from pytential import bind, sym +from pytential.array_context import PyOpenCLArrayContext from pytential.target import PointsTarget diff --git a/examples/scaling-study.py b/examples/scaling-study.py index f597d939e..e2f5eaa11 100644 --- a/examples/scaling-study.py +++ b/examples/scaling-study.py @@ -1,12 +1,12 @@ import numpy as np -from meshmode.array_context import PyOpenCLArrayContext from meshmode.discretization import Discretization from meshmode.discretization.poly_element import ( InterpolatoryQuadratureSimplexGroupFactory, ) from pytential import bind, sym +from pytential.array_context import PyOpenCLArrayContext from pytential.target import PointsTarget diff --git a/examples/cost.py b/experiments/cost.py similarity index 96% rename from examples/cost.py rename to experiments/cost.py index 27f568f5a..14b3cb5f4 100644 --- a/examples/cost.py +++ b/experiments/cost.py @@ -138,8 +138,7 @@ def calibrate_cost_model(ctx): for _ in range(RUNS): timing_data = {} - bound_op.eval({"sigma": sigma}, array_context=actx, - timing_data=timing_data) + bound_op.eval({"sigma": sigma}, array_context=actx) model_results.append(modeled_cost) timing_results.append(timing_data) @@ -175,8 +174,7 @@ def test_cost_model(ctx, calibration_params): temp_timing_results = [] for _ in range(RUNS): timing_data = {} - bound_op.eval({"sigma": sigma}, - array_context=actx, timing_data=timing_data) + bound_op.eval({"sigma": sigma}, array_context=actx) temp_timing_results.append(one(timing_data.values())) timing_result = {} diff --git a/pytential/array_context.py b/pytential/array_context.py new file mode 100644 index 000000000..25b3d2ac5 --- /dev/null +++ b/pytential/array_context.py @@ -0,0 +1,92 @@ +from __future__ import annotations + + +__copyright__ = "Copyright (C) 2022 Alexandru Fikl" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from arraycontext.pytest import ( + _PytestPyOpenCLArrayContextFactoryWithClass, + register_pytest_array_context_factory, +) +from sumpy.array_context import ( # noqa: F401 + PyOpenCLArrayContext as SumpyPyOpenCLArrayContext, + make_loopy_program, +) + + +__doc__ = """ +.. autoclass:: PyOpenCLArrayContext +""" + + +# {{{ PyOpenCLArrayContext + +class PyOpenCLArrayContext(SumpyPyOpenCLArrayContext): + def transform_loopy_program(self, t_unit): + kernel = t_unit.default_entrypoint + options = kernel.options + + if not options.return_dict or not options.no_numpy: + raise ValueError( + "loopy kernels passed to 'call_loopy' must have 'return_dict' " + "and 'no_numpy' options set. Did you use 'make_loopy_program' " + f"to create the kernel '{kernel.name}'?") + + # FIXME: this probably needs some proper logic + from meshmode.array_context import _transform_loopy_inner + transformed_t_unit = _transform_loopy_inner(t_unit) + + if transformed_t_unit is not None: + return transformed_t_unit + + return t_unit + +# }}} + + +# {{{ pytest + +def _acf(): + import pyopencl as cl + ctx = cl.create_some_context() + queue = cl.CommandQueue(ctx) + + return PyOpenCLArrayContext(queue, force_device_scalars=True) + + +class PytestPyOpenCLArrayContextFactory( + _PytestPyOpenCLArrayContextFactoryWithClass): + actx_class = PyOpenCLArrayContext + + def __call__(self): + # NOTE: prevent any cache explosions during testing! + from sympy.core.cache import clear_cache + clear_cache() + + return super().__call__() + + +register_pytest_array_context_factory( + "pytential.pyopencl", + PytestPyOpenCLArrayContextFactory) + +# }}} diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py index 964698a8e..86c0adc65 100644 --- a/pytential/linalg/proxy.py +++ b/pytential/linalg/proxy.py @@ -120,7 +120,7 @@ def partition_by_nodes( from pytential.qbx.utils import tree_code_container tcc = tree_code_container(lpot_source._setup_actx) - tree, _ = tcc.build_tree()(actx.queue, + tree, _ = tcc.build_tree()(actx, particles=flatten( actx.thaw(discr.nodes()), actx, leaf_class=DOFArray ), @@ -128,8 +128,9 @@ def partition_by_nodes( kind=tree_kind) from boxtree import box_flags_enum - tree = tree.get(actx.queue) - # FIXME: maybe this should use IS_LEAF once available? + + tree = actx.to_numpy(tree) + # FIXME maybe this should use IS_LEAF once available? assert tree.box_flags is not None leaf_boxes, = ( tree.box_flags & box_flags_enum.HAS_SOURCE_OR_TARGET_CHILD_BOXES == 0 @@ -766,12 +767,11 @@ def prg() -> lp.ExecutorBase: assert isinstance(setup_actx, PyOpenCLArrayContext) tcc = tree_code_container(setup_actx) - tree, _ = tcc.build_tree()(actx.queue, targets, - max_particles_in_box=max_particles_in_box) - query, _ = tcc.build_area_query()(actx.queue, tree, pxy.centers, pxy.radii) + tree, _ = tcc.build_tree()(actx, targets, max_particles_in_box=max_particles_in_box) + query, _ = tcc.build_area_query()(actx, tree, pxy.centers, pxy.radii) - tree = tree.get(actx.queue) - query = query.get(actx.queue) + tree = actx.to_numpy(tree) + query = actx.to_numpy(query) # }}} diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index 836a3168d..68a789464 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -39,7 +39,7 @@ ) from meshmode.discretization import Discretization from meshmode.dof_array import DOFArray -from pytools import memoize_in, memoize_method, obj_array, single_valued +from pytools import memoize_in, memoize_method, single_valued from sumpy.expansion import ( DefaultExpansionFactory as DefaultExpansionFactoryBase, ExpansionFactoryBase, @@ -497,25 +497,20 @@ def exec_compute_potential_insn(self, insn: ComputePotential, bound_expr: BoundExpression[Operand], evaluate: Callable[[ArithmeticExpression], ArrayOrArithContainer], - return_timing_data: bool ): extra_args = {} if self.fmm_level_to_order is False: func = self.exec_compute_potential_insn_direct - extra_args["return_timing_data"] = return_timing_data else: func = self.exec_compute_potential_insn_fmm - def drive_fmm(wrangler, strengths, geo_data, kernel, kernel_arguments): + def drive_fmm( + actx, wrangler, strengths, geo_data, kernel, kernel_arguments): del geo_data, kernel, kernel_arguments from pytential.qbx.fmm import drive_fmm - if return_timing_data: - timing_data = {} - else: - timing_data = None - return drive_fmm(wrangler, strengths, timing_data), timing_data + return drive_fmm(actx, wrangler, strengths) extra_args["fmm_driver"] = drive_fmm @@ -531,11 +526,11 @@ def cost_model_compute_potential_insn(self, actx, insn, bound_expr, evaluate, :arg calibration_params: a :class:`dict` of calibration parameters, mapping from parameter names to calibration values. - :arg per_box: if *true*, cost model result will be a :class:`numpy.ndarray` - or :class:`pyopencl.array.Array` with shape of the number of boxes, where - the ith entry is the sum of the cost of all stages for box i. If *false*, - cost model result will be a :class:`dict`, mapping from the stage name to - predicted cost of the stage for all boxes. + :arg per_box: if *True*, cost model result will be an array with shape + of the number of boxes, where the ith entry is the sum of the cost + of all stages for box i. If *False*, cost model result will be a + :class:`dict`, mapping from the stage name to predicted cost of the + stage for all boxes. :returns: whatever :meth:`exec_compute_potential_insn_fmm` returns. """ @@ -543,27 +538,16 @@ def cost_model_compute_potential_insn(self, actx, insn, bound_expr, evaluate, raise NotImplementedError("perf modeling direct evaluations") def drive_cost_model( - wrangler, strengths, geo_data, kernel, kernel_arguments): - - if per_box: - cost_model_result, metadata = self.cost_model.qbx_cost_per_box( - actx.queue, geo_data, kernel, kernel_arguments, - calibration_params - ) - else: - cost_model_result, metadata = self.cost_model.qbx_cost_per_stage( - actx.queue, geo_data, kernel, kernel_arguments, - calibration_params - ) - + actx, wrangler, strengths, geo_data, kernel, kernel_arguments): from functools import partial + from pytools import obj_array + return ( obj_array.vectorize( - partial(wrangler.finalize_potentials, - template_ary=strengths[0]), - wrangler.full_output_zeros(strengths[0])), - (cost_model_result, metadata)) + partial(wrangler.finalize_potentials, actx), + wrangler.full_output_zeros(actx)) + ) return self._dispatch_compute_potential_insn( actx, insn, bound_expr, evaluate, @@ -609,7 +593,7 @@ def _tree_indep_data_for_wrangler(self, source_kernels, target_kernels): if self.fmm_backend == "sumpy": from pytential.qbx.fmm import QBXSumpyTreeIndependentDataForWrangler return QBXSumpyTreeIndependentDataForWrangler( - self.cl_context, + self._setup_actx, fmm_mpole_factory, fmm_local_factory, qbx_local_factory, target_kernels=target_kernels, source_kernels=source_kernels) @@ -621,7 +605,7 @@ def _tree_indep_data_for_wrangler(self, source_kernels, target_kernels): ] from pytential.qbx.fmmlib import QBXFMMLibTreeIndependentDataForWrangler return QBXFMMLibTreeIndependentDataForWrangler( - self.cl_context, + self._setup_actx, multipole_expansion_factory=fmm_mpole_factory, local_expansion_factory=fmm_local_factory, qbx_local_expansion_factory=qbx_local_factory, @@ -673,11 +657,8 @@ def exec_compute_potential_insn_fmm(self, """ :arg fmm_driver: A function that accepts four arguments: *wrangler*, *strength*, *geo_data*, *kernel*, *kernel_arguments* - :returns: a tuple ``(assignments, extra_outputs)``, where *assignments* - is a list of tuples containing pairs ``(name, value)`` representing - assignments to be performed in the evaluation context. - *extra_outputs* is data that *fmm_driver* may return - (such as timing data), passed through unmodified. + :returns: a list of assignments containing pairs ``(name, value)`` + representing assignments to be performed in the evaluation context. """ target_name_and_side_to_number, target_discrs_and_qbx_sides = ( self.get_target_discrs_and_qbx_sides(insn, bound_expr)) @@ -741,9 +722,9 @@ def exec_compute_potential_insn_fmm(self, # }}} # Execute global QBX. - all_potentials_on_every_target, extra_outputs = ( + all_potentials_on_every_target = ( fmm_driver( - wrangler, flat_strengths, geo_data, + actx, wrangler, flat_strengths, geo_data, base_kernel, kernel_extra_kwargs)) results = [] @@ -764,7 +745,7 @@ def exec_compute_potential_insn_fmm(self, results.append((o.name, result)) - return results, extra_outputs + return results # }}} @@ -795,7 +776,7 @@ def get_lpot_applier(self, target_kernels, source_kernels): base_kernel = single_valued(knl.get_base_kernel() for knl in source_kernels) from sumpy.qbx import LayerPotential - return LayerPotential(self.cl_context, + return LayerPotential( expansion=self.get_expansion_for_qbx_direct_eval( base_kernel, target_kernels), target_kernels=target_kernels, source_kernels=source_kernels, @@ -816,7 +797,6 @@ def get_lpot_applier_on_tgt_subset(self, target_kernels, source_kernels): from pytential.qbx.direct import LayerPotentialOnTargetAndCenterSubset return LayerPotentialOnTargetAndCenterSubset( - self.cl_context, expansion=VolumeTaylorLocalExpansion(base_kernel, self.qbx_order), target_kernels=target_kernels, source_kernels=source_kernels, value_dtypes=value_dtype) @@ -826,7 +806,7 @@ def get_qbx_target_numberer(self, dtype): assert dtype == np.int32 from pyopencl.scan import GenericScanKernel return GenericScanKernel( - self.cl_context, np.int32, + self._setup_actx.context, np.int32, arguments="int *tgt_to_qbx_center, int *qbx_tgt_number, int *count", input_expr="tgt_to_qbx_center[i] >= 0 ? 1 : 0", scan_expr="a+b", neutral="0", @@ -838,21 +818,11 @@ def get_qbx_target_numberer(self, dtype): *count = item; """) - def exec_compute_potential_insn_direct(self, actx, insn, bound_expr, evaluate, - return_timing_data): + def exec_compute_potential_insn_direct(self, actx, insn, bound_expr, evaluate): from meshmode.discretization import Discretization from pytential import bind, sym - if return_timing_data: - from warnings import warn - - from pytential.source import UnableToCollectTimingData - warn( - "Timing data collection not supported.", - category=UnableToCollectTimingData, - stacklevel=2) - # {{{ evaluate and flatten inputs @memoize_in(bound_expr.places, @@ -920,7 +890,6 @@ def _flat_centers(dofdesc, qbx_forced_limit): other_outputs[o.target_name, qbx_forced_limit].append((i, o)) - queue = actx.queue results = [None] * len(insn.outputs) # }}} @@ -937,7 +906,7 @@ def _flat_centers(dofdesc, qbx_forced_limit): target_name.geometry, target_name.discr_stage) flat_target_nodes = _flat_nodes(target_name) - _, output_for_each_kernel = lpot_applier(queue, + output_for_each_kernel = lpot_applier(actx, targets=flat_target_nodes, sources=flat_source_nodes, centers=_flat_centers(target_name, qbx_forced_limit), @@ -961,6 +930,8 @@ def _flat_centers(dofdesc, qbx_forced_limit): p2p = self.get_p2p(actx, insn.target_kernels, insn.source_kernels) lpot_applier_on_tgt_subset = self.get_lpot_applier_on_tgt_subset( insn.target_kernels, insn.source_kernels) + else: + p2p = lpot_applier_on_tgt_subset = None for (target_name, qbx_forced_limit), outputs in other_outputs.items(): target_discr = bound_expr.places.get_discretization( @@ -968,12 +939,12 @@ def _flat_centers(dofdesc, qbx_forced_limit): flat_target_nodes = _flat_nodes(target_name) # FIXME: (Somewhat wastefully) compute P2P for all targets - _, output_for_each_kernel = p2p( - queue, - targets=flat_target_nodes, - sources=flat_source_nodes, - strength=flat_strengths, - **flat_kernel_args) + assert p2p is not None + output_for_each_kernel = p2p(actx, + targets=flat_target_nodes, + sources=flat_source_nodes, + strength=flat_strengths, + **flat_kernel_args) target_discrs_and_qbx_sides = ((target_discr, qbx_forced_limit),) geo_data = self.qbx_fmm_geometry_data( @@ -995,7 +966,7 @@ def _flat_centers(dofdesc, qbx_forced_limit): qbx_tgt_numberer( tgt_to_qbx_center, qbx_tgt_numbers, qbx_tgt_count, - queue=queue) + queue=actx.queue) qbx_tgt_count = int(actx.to_numpy(qbx_tgt_count).item()) if (abs(qbx_forced_limit) == 1 and qbx_tgt_count < target_discr.ndofs): @@ -1011,8 +982,9 @@ def _flat_centers(dofdesc, qbx_forced_limit): tgt_subset_kwargs[f"result_{i}"] = res_i if qbx_tgt_count: + assert lpot_applier_on_tgt_subset is not None lpot_applier_on_tgt_subset( - queue, + actx, targets=flat_target_nodes, sources=flat_source_nodes, centers=geo_data.flat_centers(), @@ -1032,8 +1004,7 @@ def _flat_centers(dofdesc, qbx_forced_limit): # }}} - timing_data = {} - return results, timing_data + return results # }}} diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 62ed38dde..fdacbe7d3 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -27,33 +27,29 @@ THE SOFTWARE. """ +import logging from abc import abstractmethod from functools import partial +from typing import TYPE_CHECKING import numpy as np from mako.template import Template -import pyopencl as cl -import pyopencl.array +import pymbolic.primitives as prim from boxtree.cost import ( AbstractFMMCostModel as BaseAbstractFMMCostModel, FMMCostModel, FMMTranslationCostModel, _PythonFMMCostModel, ) -from pymbolic import evaluate, var -from pyopencl.array import take -from pyopencl.elementwise import ElementwiseKernel -from pyopencl.tools import dtype_to_ctype -from pytools import memoize_method - +from pytools import memoize_in -Template = partial(Template, strict_undefined=True) - -import logging +if TYPE_CHECKING: + from pytential.array_context import PyOpenCLArrayContext logger = logging.getLogger(__name__) +Template = partial(Template, strict_undefined=True) __doc__ = """ @@ -130,23 +126,23 @@ def __init__(self, ncoeffs_qbx, ncoeffs_fmm_by_level, uses_point_and_shoot): ) def p2qbxl(self): - return var("c_p2qbxl") * self.ncoeffs_qbx + return prim.var("c_p2qbxl") * self.ncoeffs_qbx def p2p_tsqbx(self): # This term should be linear in the QBX order, which is the # square root of the number of QBX coefficients. - return var("c_p2p_tsqbx") * self.ncoeffs_qbx ** (1 / 2) + return prim.var("c_p2p_tsqbx") * self.ncoeffs_qbx ** (1 / 2) def qbxl2p(self): - return var("c_qbxl2p") * self.ncoeffs_qbx + return prim.var("c_qbxl2p") * self.ncoeffs_qbx def m2qbxl(self, level): - return var("c_m2qbxl") * self.e2e_cost( + return prim.var("c_m2qbxl") * self.e2e_cost( self.ncoeffs_fmm_by_level[level], self.ncoeffs_qbx) def l2qbxl(self, level): - return var("c_l2qbxl") * self.e2e_cost( + return prim.var("c_l2qbxl") * self.e2e_cost( self.ncoeffs_fmm_by_level[level], self.ncoeffs_qbx) @@ -159,8 +155,8 @@ def make_pde_aware_translation_cost_model(dim, nlevels): """Create a cost model for FMM translation operators that make use of the knowledge that the potential satisfies a PDE. """ - p_qbx = var("p_qbx") - p_fmm = np.array([var(f"p_fmm_lev{i}") for i in range(nlevels)]) + p_qbx = prim.var("p_qbx") + p_fmm = np.array([prim.var(f"p_fmm_lev{i}") for i in range(nlevels)]) uses_point_and_shoot = False @@ -180,8 +176,8 @@ def make_taylor_translation_cost_model(dim, nlevels): """Create a cost model for FMM translation based on Taylor expansions in Cartesian coordinates. """ - p_qbx = var("p_qbx") - p_fmm = np.array([var(f"p_fmm_lev{i}") for i in range(nlevels)]) + p_qbx = prim.var("p_qbx") + p_fmm = np.array([prim.var(f"p_fmm_lev{i}") for i in range(nlevels)]) ncoeffs_fmm = (p_fmm + 1) ** dim ncoeffs_qbx = (p_qbx + 1) ** dim @@ -212,85 +208,80 @@ class AbstractQBXCostModel(BaseAbstractFMMCostModel): """ @abstractmethod - def process_form_qbxl(self, queue, geo_data, p2qbxl_cost, + def process_form_qbxl(self, actx: PyOpenCLArrayContext, geo_data, p2qbxl_cost, ndirect_sources_per_target_box): """ - :arg queue: a :class:`pyopencl.CommandQueue` object. :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object. :arg p2qbxl_cost: a :class:`numpy.float64` constant representing the cost of adding a source to a QBX local expansion. - :arg ndirect_sources_per_target_box: a :class:`numpy.ndarray` or - :class:`pyopencl.array.Array` of shape ``(ntarget_boxes,)``, with the - *i*th entry representing the number of direct evaluation sources (list 1, - list 3 close and list 4 close) for ``target_boxes[i]``. - :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape - ``(ntarget_boxes,)``, with the *i*th entry representing the cost of - adding all direct evaluation sources to QBX local expansions of centers - in ``target_boxes[i]``. + :arg ndirect_sources_per_target_box: an array of shape ``(ntarget_boxes,)``, + with the *i*th entry representing the number of direct evaluation + sources (list 1, list 3 close and list 4 close) for ``target_boxes[i]``. + :return: an array of shape ``(ntarget_boxes,)``, with the *i*th entry + representing the cost of adding all direct evaluation sources to + QBX local expansions of centers in ``target_boxes[i]``. """ pass @abstractmethod - def process_m2qbxl(self, queue, geo_data, m2qbxl_cost): + def process_m2qbxl(self, actx: PyOpenCLArrayContext, geo_data, m2qbxl_cost): """ :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object. - :arg m2qbxl_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` - of shape ``(nlevels,)`` where the *i*th entry represents the translation - cost from multipole expansion at level *i* to a QBX center. - :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape - ``(ntarget_boxes,)``, with the *i*th entry representing the cost of - translating multipole expansions of list 3 boxes at all source levels to - all QBX centers in ``target_boxes[i]``. + :arg m2qbxl_cost: an array of shape ``(nlevels,)`` where the *i*th entry + represents the translation cost from multipole expansion at level + *i* to a QBX center. + :return: an array of shape ``(ntarget_boxes,)``, with the *i*th entry + representing the cost of translating multipole expansions of list + 3 boxes at all source levels to all QBX centers in ``target_boxes[i]``. """ pass @abstractmethod - def process_l2qbxl(self, queue, geo_data, l2qbxl_cost): + def process_l2qbxl(self, actx: PyOpenCLArrayContext, geo_data, l2qbxl_cost): """ :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object. - :arg l2qbxl_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` - of shape ``(nlevels,)`` where each entry represents the translation - cost from a box local expansion to a QBX local expansion. - :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape - ``(ntarget_boxes,)``, with each entry representing the cost of - translating box local expansions to all QBX local expansions. + :arg l2qbxl_cost: an array of shape ``(nlevels,)`` where each entry + represents the translation cost from a box local expansion to a QBX + local expansion. + :return: an array of shape ``(ntarget_boxes,)``, with each entry + representing the cost of translating box local expansions to all + QBX local expansions. """ pass @abstractmethod - def process_eval_qbxl(self, queue, geo_data, qbxl2p_cost): + def process_eval_qbxl(self, actx: PyOpenCLArrayContext, geo_data, qbxl2p_cost): """ :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object. :arg qbxl2p_cost: a :class:`numpy.float64` constant, representing the evaluation cost of a target from its QBX local expansion. - :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape - ``(ntarget_boxes,)``, with the *i*th entry representing the cost of - evaluating all targets associated with QBX centers in ``target_boxes[i]`` - from QBX local expansions. + :return: an array of shape ``(ntarget_boxes,)``, with the *i*th entry + representing the cost of evaluating all targets associated with QBX + centers in ``target_boxes[i]`` from QBX local expansions. """ pass @abstractmethod - def process_eval_target_specific_qbxl(self, queue, geo_data, p2p_tsqbx_cost, + def process_eval_target_specific_qbxl(self, actx: PyOpenCLArrayContext, + geo_data, p2p_tsqbx_cost, ndirect_sources_per_target_box): """ :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object. :arg p2p_tsqbx_cost: a :class:`numpy.float64` constant representing the evaluation cost of a target from a direct evaluation source of the target box containing the expansion center. - :arg ndirect_sources_per_target_box: a :class:`numpy.ndarray` or - :class:`pyopencl.array.Array` of shape ``(ntarget_boxes,)``, with the - *i*th entry representing the number of direct evaluation sources + :arg ndirect_sources_per_target_box: an array of shape ``(ntarget_boxes,)``, + with the *i*th entry representing the number of direct evaluation sources (list 1, list 3 close and list 4 close) for ``target_boxes[i]``. - :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape - ``(ntarget_boxes,)``, with the *i*th entry representing the evaluation - cost of all targets associated with centers in ``target_boxes[i]`` from - the direct evaluation sources of ``target_boxes[i]``. + :return: an array of shape ``(ntarget_boxes,)``, with the *i*th entry + representing the evaluation cost of all targets associated with + centers in ``target_boxes[i]`` from the direct evaluation sources of + ``target_boxes[i]``. """ pass def qbx_cost_factors_for_kernels_from_model( - self, queue, nlevels, xlat_cost, context): + self, actx: PyOpenCLArrayContext, nlevels, xlat_cost, context): """Evaluate translation cost factors from symbolic model. The result of this function can be used for process_* methods in this class. @@ -298,8 +289,6 @@ def qbx_cost_factors_for_kernels_from_model( :class:`boxtree.cost.AbstractFMMCostModel` to support operations specific to QBX. - :arg queue: If not None, the cost factor arrays will be transferred to device - using this queue. :arg nlevels: the number of tree levels. :arg xlat_cost: a :class:`QBXTranslationCostModel`. :arg context: a :class:`dict` mapping from the symbolic names of parameters @@ -309,25 +298,25 @@ def qbx_cost_factors_for_kernels_from_model( of those stages in FMM and QBX. """ cost_factors = self.fmm_cost_factors_for_kernels_from_model( - queue, nlevels, xlat_cost, context + actx, nlevels, xlat_cost, context ) cost_factors.update({ - "p2qbxl_cost": evaluate(xlat_cost.p2qbxl(), context=context), + "p2qbxl_cost": prim.evaluate(xlat_cost.p2qbxl(), context=context), "m2qbxl_cost": np.array([ - evaluate(xlat_cost.m2qbxl(ilevel), context=context) + prim.evaluate(xlat_cost.m2qbxl(ilevel), context=context) for ilevel in range(nlevels) ]), "l2qbxl_cost": np.array([ - evaluate(xlat_cost.l2qbxl(ilevel), context=context) + prim.evaluate(xlat_cost.l2qbxl(ilevel), context=context) for ilevel in range(nlevels) ]), - "qbxl2p_cost": evaluate(xlat_cost.qbxl2p(), context=context), - "p2p_tsqbx_cost": evaluate(xlat_cost.p2p_tsqbx(), context=context) + "qbxl2p_cost": prim.evaluate(xlat_cost.qbxl2p(), context=context), + "p2p_tsqbx_cost": prim.evaluate(xlat_cost.p2p_tsqbx(), context=context) }) - if queue: - cost_factors = self.cost_factors_to_dev(cost_factors, queue) + if actx: + cost_factors = self.cost_factors_to_dev(cost_factors, actx) return cost_factors @@ -349,7 +338,8 @@ def gather_metadata(geo_data, fmm_level_to_order): return metadata - def qbx_cost_per_box(self, queue, geo_data, kernel, kernel_arguments, + def qbx_cost_per_box(self, actx: PyOpenCLArrayContext, + geo_data, kernel, kernel_arguments, calibration_params): # FIXME: This should support target filtering. lpot_source = geo_data.lpot_source @@ -384,15 +374,15 @@ def qbx_cost_per_box(self, queue, geo_data, kernel, kernel_arguments, ) translation_cost = self.qbx_cost_factors_for_kernels_from_model( - queue, tree.nlevels, xlat_cost, params + actx, tree.nlevels, xlat_cost, params ) ndirect_sources_per_target_box = \ - self.get_ndirect_sources_per_target_box(queue, traversal) + self.get_ndirect_sources_per_target_box(actx, traversal) # get FMM cost per box from parent class result = self.cost_per_box( - queue, traversal, fmm_level_to_order, + actx, traversal, fmm_level_to_order, calibration_params, ndirect_sources_per_target_box=ndirect_sources_per_target_box, box_target_counts_nonchild=box_target_counts_nonchild @@ -400,32 +390,32 @@ def qbx_cost_per_box(self, queue, geo_data, kernel, kernel_arguments, if use_tsqbx: result[target_boxes] += self.process_eval_target_specific_qbxl( - queue, geo_data, translation_cost["p2p_tsqbx_cost"], + actx, geo_data, translation_cost["p2p_tsqbx_cost"], ndirect_sources_per_target_box ) else: result[target_boxes] += self.process_form_qbxl( - queue, geo_data, translation_cost["p2qbxl_cost"], + actx, geo_data, translation_cost["p2qbxl_cost"], ndirect_sources_per_target_box ) result[target_boxes] += self.process_m2qbxl( - queue, geo_data, translation_cost["m2qbxl_cost"] + actx, geo_data, translation_cost["m2qbxl_cost"] ) result[target_boxes] += self.process_l2qbxl( - queue, geo_data, translation_cost["l2qbxl_cost"] + actx, geo_data, translation_cost["l2qbxl_cost"] ) result[target_boxes] += self.process_eval_qbxl( - queue, geo_data, translation_cost["qbxl2p_cost"] + actx, geo_data, translation_cost["qbxl2p_cost"] ) metadata = self.gather_metadata(geo_data, fmm_level_to_order) return result, metadata - def qbx_cost_per_stage(self, queue, geo_data, kernel, kernel_arguments, + def qbx_cost_per_stage(self, actx, geo_data, kernel, kernel_arguments, calibration_params): # FIXME: This should support target filtering. lpot_source = geo_data.lpot_source @@ -459,15 +449,15 @@ def qbx_cost_per_stage(self, queue, geo_data, kernel, kernel_arguments, ) translation_cost = self.qbx_cost_factors_for_kernels_from_model( - queue, tree.nlevels, xlat_cost, params + actx, tree.nlevels, xlat_cost, params ) ndirect_sources_per_target_box = \ - self.get_ndirect_sources_per_target_box(queue, traversal) + self.get_ndirect_sources_per_target_box(actx, traversal) # get FMM per-stage cost from parent class result = self.cost_per_stage( - queue, traversal, fmm_level_to_order, + actx, traversal, fmm_level_to_order, calibration_params, ndirect_sources_per_target_box=ndirect_sources_per_target_box, box_target_counts_nonchild=box_target_counts_nonchild @@ -475,29 +465,34 @@ def qbx_cost_per_stage(self, queue, geo_data, kernel, kernel_arguments, if use_tsqbx: result["eval_target_specific_qbx_locals"] = self.aggregate_over_boxes( + actx, self.process_eval_target_specific_qbxl( - queue, geo_data, translation_cost["p2p_tsqbx_cost"], + actx, geo_data, translation_cost["p2p_tsqbx_cost"], ndirect_sources_per_target_box=ndirect_sources_per_target_box ) ) else: result["form_global_qbx_locals"] = self.aggregate_over_boxes( + actx, self.process_form_qbxl( - queue, geo_data, translation_cost["p2qbxl_cost"], + actx, geo_data, translation_cost["p2qbxl_cost"], ndirect_sources_per_target_box ) ) result["translate_box_multipoles_to_qbx_local"] = self.aggregate_over_boxes( - self.process_m2qbxl(queue, geo_data, translation_cost["m2qbxl_cost"]) + actx, + self.process_m2qbxl(actx, geo_data, translation_cost["m2qbxl_cost"]) ) result["translate_box_local_to_qbx_local"] = self.aggregate_over_boxes( - self.process_l2qbxl(queue, geo_data, translation_cost["l2qbxl_cost"]) + actx, + self.process_l2qbxl(actx, geo_data, translation_cost["l2qbxl_cost"]) ) result["eval_qbx_expansions"] = self.aggregate_over_boxes( - self.process_eval_qbxl(queue, geo_data, translation_cost["qbxl2p_cost"]) + actx, + self.process_eval_qbxl(actx, geo_data, translation_cost["qbxl2p_cost"]) ) metadata = self.gather_metadata(geo_data, fmm_level_to_order) @@ -585,9 +580,7 @@ def estimate_kernel_specific_calibration_params( class QBXCostModel(AbstractQBXCostModel, FMMCostModel): - """This class is an implementation of interface :class:`AbstractQBXCostModel` - using :mod:`pyopencl`. - """ + """An implementation of interface :class:`AbstractQBXCostModel`.""" def __init__( self, translation_cost_model_factory=make_pde_aware_translation_cost_model): @@ -598,73 +591,91 @@ def __init__( """ FMMCostModel.__init__(self, translation_cost_model_factory) - @memoize_method - def _fill_array_with_index_knl(self, context, idx_dtype, array_dtype): - return ElementwiseKernel( - context, - Template(r""" - ${idx_t} *index, - ${array_t} *array, - ${array_t} val - """).render( - idx_t=dtype_to_ctype(idx_dtype), - array_t=dtype_to_ctype(array_dtype) - ), - Template(r""" - array[index[i]] = val; - """).render(), - name="fill_array_with_index" - ) + def _fill_array_with_index_knl(self, + actx: PyOpenCLArrayContext, idx_dtype, array_dtype): + from pyopencl.elementwise import ElementwiseKernel + from pyopencl.tools import dtype_to_ctype + + @memoize_in(actx, ( + QBXCostModel._fill_array_with_index_knl, + idx_dtype, array_dtype)) + def get_kernel(): + return ElementwiseKernel( + actx.context, + Template(r""" + ${idx_t} *index, + ${array_t} *array, + ${array_t} val + """).render( + idx_t=dtype_to_ctype(idx_dtype), + array_t=dtype_to_ctype(array_dtype) + ), + Template(r""" + array[index[i]] = val; + """).render(), + name="fill_array_with_index" + ) + + return get_kernel() - def _fill_array_with_index(self, queue, array, index, value): + def _fill_array_with_index(self, actx, array, index, value): idx_dtype = index.dtype array_dtype = array.dtype - knl = self._fill_array_with_index_knl(queue.context, idx_dtype, array_dtype) - knl(index, array, value, queue=queue) - - @memoize_method - def count_global_qbx_centers_knl(self, context, box_id_dtype, particle_id_dtype): - return ElementwiseKernel( - context, - Template(r""" - ${particle_id_t} *nqbx_centers_itgt_box, - ${particle_id_t} *global_qbx_center_weight, - ${box_id_t} *target_boxes, - ${particle_id_t} *box_target_starts, - ${particle_id_t} *box_target_counts_nonchild - """).render( - box_id_t=dtype_to_ctype(box_id_dtype), - particle_id_t=dtype_to_ctype(particle_id_dtype) - ), - Template(r""" - ${box_id_t} global_box_id = target_boxes[i]; - ${particle_id_t} start = box_target_starts[global_box_id]; - ${particle_id_t} end = start + box_target_counts_nonchild[ - global_box_id - ]; - - ${particle_id_t} nqbx_centers = 0; - for(${particle_id_t} iparticle = start; iparticle < end; iparticle++) - nqbx_centers += global_qbx_center_weight[iparticle]; - - nqbx_centers_itgt_box[i] = nqbx_centers; - """).render( - box_id_t=dtype_to_ctype(box_id_dtype), - particle_id_t=dtype_to_ctype(particle_id_dtype) - ), - name="count_global_qbx_centers" - ) + knl = self._fill_array_with_index_knl(actx, idx_dtype, array_dtype) + knl(index, array, value, queue=actx.queue) + + def count_global_qbx_centers_knl(self, + actx: PyOpenCLArrayContext, box_id_dtype, particle_id_dtype): + from pyopencl.elementwise import ElementwiseKernel + from pyopencl.tools import dtype_to_ctype + + @memoize_in(actx, ( + QBXCostModel.count_global_qbx_centers_knl, + box_id_dtype, particle_id_dtype)) + def get_kernel(): + return ElementwiseKernel( + actx.context, + Template(r""" + ${particle_id_t} *nqbx_centers_itgt_box, + ${particle_id_t} *global_qbx_center_weight, + ${box_id_t} *target_boxes, + ${particle_id_t} *box_target_starts, + ${particle_id_t} *box_target_counts_nonchild + """).render( + box_id_t=dtype_to_ctype(box_id_dtype), + particle_id_t=dtype_to_ctype(particle_id_dtype) + ), + Template(r""" + ${box_id_t} global_box_id = target_boxes[i]; + ${particle_id_t} start = box_target_starts[global_box_id]; + ${particle_id_t} end = start + box_target_counts_nonchild[ + global_box_id + ]; + + ${particle_id_t} nqbx_centers = 0; + for(${particle_id_t} iparticle = start; iparticle < end; iparticle++) + nqbx_centers += global_qbx_center_weight[iparticle]; + + nqbx_centers_itgt_box[i] = nqbx_centers; + """).render( # noqa: E501 + box_id_t=dtype_to_ctype(box_id_dtype), + particle_id_t=dtype_to_ctype(particle_id_dtype) + ), + name="count_global_qbx_centers" + ) + + return get_kernel() - def get_nqbx_centers_per_tgt_box(self, queue, geo_data, weights=None): + def get_nqbx_centers_per_tgt_box(self, + actx: PyOpenCLArrayContext, geo_data, weights=None): """ - :arg queue: a :class:`pyopencl.CommandQueue` object. :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object. - :arg weights: a :class:`pyopencl.array.Array` of shape ``(ncenters,)`` with - particle_id_dtype, the weight of each center in user order. - :return: a :class:`pyopencl.array.Array` of shape ``(ntarget_boxes,)`` with - type *particle_id_dtype* where the *i*th entry represents the number of - `geo_data.global_qbx_centers` in ``target_boxes[i]``, optionally weighted - by *weights*. + :arg weights: an array of shape ``(ncenters,)`` with ``particle_id_dtype``, + the weight of each center in user order. + :return: an array of shape ``(ntarget_boxes,)`` with type + *particle_id_dtype* where the *i*th entry represents the number of + `geo_data.global_qbx_centers` in ``target_boxes[i]``, optionally + weighted by *weights*. """ traversal = geo_data.traversal() tree = geo_data.tree() @@ -672,15 +683,16 @@ def get_nqbx_centers_per_tgt_box(self, queue, geo_data, weights=None): ncenters = geo_data.ncenters # Build a mask (weight) of whether a target is a global qbx center - global_qbx_centers_tree_order = take( - tree.sorted_target_ids, global_qbx_centers, queue=queue + global_qbx_centers_tree_order = ( + actx.thaw(tree.sorted_target_ids)[global_qbx_centers] ) - global_qbx_center_weight = cl.array.zeros( - queue, tree.ntargets, dtype=tree.particle_id_dtype + + global_qbx_center_weight = actx.zeros( + tree.ntargets, dtype=tree.particle_id_dtype ) self._fill_array_with_index( - queue, global_qbx_center_weight, global_qbx_centers_tree_order, 1 + actx, global_qbx_center_weight, global_qbx_centers_tree_order, 1 ) if weights is not None: @@ -690,12 +702,12 @@ def get_nqbx_centers_per_tgt_box(self, queue, geo_data, weights=None): # Each target box enumerate its target list and add the weight of global # qbx centers ntarget_boxes = len(traversal.target_boxes) - nqbx_centers_itgt_box = cl.array.empty( - queue, ntarget_boxes, dtype=tree.particle_id_dtype + nqbx_centers_itgt_box = actx.zeros( + ntarget_boxes, dtype=tree.particle_id_dtype ) count_global_qbx_centers_knl = self.count_global_qbx_centers_knl( - queue.context, tree.box_id_dtype, tree.particle_id_dtype + actx, tree.box_id_dtype, tree.particle_id_dtype ) count_global_qbx_centers_knl( nqbx_centers_itgt_box, @@ -703,61 +715,71 @@ def get_nqbx_centers_per_tgt_box(self, queue, geo_data, weights=None): traversal.target_boxes, tree.box_target_starts, tree.box_target_counts_nonchild, - queue=queue + queue=actx.queue ) return nqbx_centers_itgt_box - def process_form_qbxl(self, queue, geo_data, p2qbxl_cost, + def process_form_qbxl(self, actx: PyOpenCLArrayContext, + geo_data, p2qbxl_cost, ndirect_sources_per_target_box): - nqbx_centers_itgt_box = self.get_nqbx_centers_per_tgt_box(queue, geo_data) + nqbx_centers_itgt_box = self.get_nqbx_centers_per_tgt_box(actx, geo_data) return (nqbx_centers_itgt_box * ndirect_sources_per_target_box * p2qbxl_cost) - @memoize_method - def process_m2qbxl_knl(self, context, box_id_dtype, particle_id_dtype): - return ElementwiseKernel( - context, - Template(r""" - ${box_id_t} *idx_to_itgt_box, - ${particle_id_t} *nqbx_centers_itgt_box, - ${box_id_t} *ssn_starts, - double *nm2qbxl, - double m2qbxl_cost - """).render( - box_id_t=dtype_to_ctype(box_id_dtype), - particle_id_t=dtype_to_ctype(particle_id_dtype) - ), - Template(r""" - // get the index of current box in target_boxes - ${box_id_t} itgt_box = idx_to_itgt_box[i]; - // get the number of expansion centers in current box - ${particle_id_t} nqbx_centers = nqbx_centers_itgt_box[itgt_box]; - // get the number of list 3 boxes of the current box in a particular - // level - ${box_id_t} nlist3_boxes = ssn_starts[i + 1] - ssn_starts[i]; - // calculate the cost - nm2qbxl[itgt_box] += (nqbx_centers * nlist3_boxes * m2qbxl_cost); - """).render( - box_id_t=dtype_to_ctype(box_id_dtype), - particle_id_t=dtype_to_ctype(particle_id_dtype) - ), - name="process_m2qbxl" - ) + def process_m2qbxl_knl(self, + actx: PyOpenCLArrayContext, box_id_dtype, particle_id_dtype): + from pyopencl.elementwise import ElementwiseKernel + from pyopencl.tools import dtype_to_ctype + + @memoize_in(actx, ( + QBXCostModel.process_m2qbxl_knl, + box_id_dtype, particle_id_dtype)) + def get_kernel(): + return ElementwiseKernel( + actx.context, + Template(r""" + ${box_id_t} *idx_to_itgt_box, + ${particle_id_t} *nqbx_centers_itgt_box, + ${box_id_t} *ssn_starts, + double *nm2qbxl, + double m2qbxl_cost + """).render( + box_id_t=dtype_to_ctype(box_id_dtype), + particle_id_t=dtype_to_ctype(particle_id_dtype) + ), + Template(r""" + // get the index of current box in target_boxes + ${box_id_t} itgt_box = idx_to_itgt_box[i]; + // get the number of expansion centers in current box + ${particle_id_t} nqbx_centers = nqbx_centers_itgt_box[itgt_box]; + // get the number of list 3 boxes of the current box in a particular + // level + ${box_id_t} nlist3_boxes = ssn_starts[i + 1] - ssn_starts[i]; + // calculate the cost + nm2qbxl[itgt_box] += (nqbx_centers * nlist3_boxes * m2qbxl_cost); + """).render( + box_id_t=dtype_to_ctype(box_id_dtype), + particle_id_t=dtype_to_ctype(particle_id_dtype) + ), + name="process_m2qbxl" + ) + + return get_kernel() - def process_m2qbxl(self, queue, geo_data, m2qbxl_cost): + def process_m2qbxl(self, actx: PyOpenCLArrayContext, geo_data, m2qbxl_cost): tree = geo_data.tree() traversal = geo_data.traversal() ntarget_boxes = len(traversal.target_boxes) - nqbx_centers_itgt_box = self.get_nqbx_centers_per_tgt_box(queue, geo_data) + nqbx_centers_itgt_box = self.get_nqbx_centers_per_tgt_box(actx, geo_data) process_m2qbxl_knl = self.process_m2qbxl_knl( - queue.context, tree.box_id_dtype, tree.particle_id_dtype + actx, tree.box_id_dtype, tree.particle_id_dtype ) - nm2qbxl = cl.array.zeros(queue, ntarget_boxes, dtype=np.float64) + nm2qbxl = actx.zeros(ntarget_boxes, dtype=np.float64) for isrc_level, ssn in enumerate(traversal.from_sep_smaller_by_level): process_m2qbxl_knl( @@ -765,45 +787,43 @@ def process_m2qbxl(self, queue, geo_data, m2qbxl_cost): nqbx_centers_itgt_box, ssn.starts, nm2qbxl, - m2qbxl_cost[isrc_level].get(queue).reshape(-1)[0], - queue=queue + actx.to_numpy(m2qbxl_cost[isrc_level]).reshape(-1)[0], + queue=actx.queue ) return nm2qbxl - def process_l2qbxl(self, queue, geo_data, l2qbxl_cost): + def process_l2qbxl(self, actx: PyOpenCLArrayContext, geo_data, l2qbxl_cost): tree = geo_data.tree() traversal = geo_data.traversal() - nqbx_centers_itgt_box = self.get_nqbx_centers_per_tgt_box(queue, geo_data) + nqbx_centers_itgt_box = self.get_nqbx_centers_per_tgt_box(actx, geo_data) # l2qbxl_cost_itgt_box = l2qbxl_cost[tree.box_levels[traversal.target_boxes]] - l2qbxl_cost_itgt_box = take( - l2qbxl_cost, - take(tree.box_levels, traversal.target_boxes, queue=queue), - queue=queue - ) + tgt_box_levels = actx.thaw(tree.box_levels)[traversal.target_boxes] + l2qbxl_cost_itgt_box = actx.thaw(l2qbxl_cost)[tgt_box_levels] return nqbx_centers_itgt_box * l2qbxl_cost_itgt_box - def process_eval_qbxl(self, queue, geo_data, qbxl2p_cost): + def process_eval_qbxl(self, actx: PyOpenCLArrayContext, geo_data, qbxl2p_cost): center_to_targets_starts = geo_data.center_to_tree_targets().starts - center_to_targets_starts = center_to_targets_starts.with_queue(queue) + center_to_targets_starts = actx.thaw(center_to_targets_starts) weights = center_to_targets_starts[1:] - center_to_targets_starts[:-1] nqbx_targets_itgt_box = self.get_nqbx_centers_per_tgt_box( - queue, geo_data, weights=weights + actx, geo_data, weights=weights ) return nqbx_targets_itgt_box * qbxl2p_cost - def process_eval_target_specific_qbxl(self, queue, geo_data, p2p_tsqbx_cost, + def process_eval_target_specific_qbxl(self, actx: PyOpenCLArrayContext, + geo_data, p2p_tsqbx_cost, ndirect_sources_per_target_box): center_to_targets_starts = geo_data.center_to_tree_targets().starts - center_to_targets_starts = center_to_targets_starts.with_queue(queue) + center_to_targets_starts = actx.thaw(center_to_targets_starts) weights = center_to_targets_starts[1:] - center_to_targets_starts[:-1] nqbx_targets_itgt_box = self.get_nqbx_centers_per_tgt_box( - queue, geo_data, weights=weights + actx, geo_data, weights=weights ) return (nqbx_targets_itgt_box @@ -811,13 +831,9 @@ def process_eval_target_specific_qbxl(self, queue, geo_data, p2p_tsqbx_cost, * p2p_tsqbx_cost) def qbx_cost_factors_for_kernels_from_model( - self, queue, nlevels, xlat_cost, context): - if not isinstance(queue, cl.CommandQueue): - raise TypeError( - "An OpenCL command queue must be supplied for cost model") - + self, actx: PyOpenCLArrayContext, nlevels, xlat_cost, context): return AbstractQBXCostModel.qbx_cost_factors_for_kernels_from_model( - self, queue, nlevels, xlat_cost, context + self, actx, nlevels, xlat_cost, context ) @@ -834,7 +850,7 @@ def __init__( """ _PythonFMMCostModel.__init__(self, translation_cost_model_factory) - def process_form_qbxl(self, queue, geo_data, p2qbxl_cost, + def process_form_qbxl(self, actx, geo_data, p2qbxl_cost, ndirect_sources_per_target_box): global_qbx_centers = geo_data.global_qbx_centers() qbx_center_to_target_box = geo_data.qbx_center_to_target_box() @@ -848,7 +864,7 @@ def process_form_qbxl(self, queue, geo_data, p2qbxl_cost, return np2qbxl * p2qbxl_cost - def process_eval_target_specific_qbxl(self, queue, geo_data, p2p_tsqbx_cost, + def process_eval_target_specific_qbxl(self, actx, geo_data, p2p_tsqbx_cost, ndirect_sources_per_target_box): center_to_targets_starts = geo_data.center_to_tree_targets().starts global_qbx_centers = geo_data.global_qbx_centers() @@ -865,7 +881,7 @@ def process_eval_target_specific_qbxl(self, queue, geo_data, p2p_tsqbx_cost, return neval_tsqbx * p2p_tsqbx_cost - def process_m2qbxl(self, queue, geo_data, m2qbxl_cost): + def process_m2qbxl(self, actx, geo_data, m2qbxl_cost): traversal = geo_data.traversal() global_qbx_centers = geo_data.global_qbx_centers() qbx_center_to_target_box = geo_data.qbx_center_to_target_box() @@ -897,7 +913,7 @@ def process_m2qbxl(self, queue, geo_data, m2qbxl_cost): return nm2qbxl - def process_l2qbxl(self, queue, geo_data, l2qbxl_cost): + def process_l2qbxl(self, actx, geo_data, l2qbxl_cost): tree = geo_data.tree() traversal = geo_data.traversal() global_qbx_centers = geo_data.global_qbx_centers() @@ -913,7 +929,7 @@ def process_l2qbxl(self, queue, geo_data, l2qbxl_cost): return nl2qbxl - def process_eval_qbxl(self, queue, geo_data, qbxl2p_cost): + def process_eval_qbxl(self, actx, geo_data, qbxl2p_cost): traversal = geo_data.traversal() global_qbx_centers = geo_data.global_qbx_centers() center_to_targets_starts = geo_data.center_to_tree_targets().starts @@ -929,7 +945,7 @@ def process_eval_qbxl(self, queue, geo_data, qbxl2p_cost): return neval_qbxl * qbxl2p_cost - def qbx_cost_per_box(self, queue, geo_data, kernel, kernel_arguments, + def qbx_cost_per_box(self, actx, geo_data, kernel, kernel_arguments, calibration_params): """This function transfers *geo_data* to host if necessary """ @@ -941,10 +957,10 @@ def qbx_cost_per_box(self, queue, geo_data, kernel, kernel_arguments, geo_data = ToHostTransferredGeoDataWrapper(geo_data) return AbstractQBXCostModel.qbx_cost_per_box( - self, queue, geo_data, kernel, kernel_arguments, calibration_params + self, actx, geo_data, kernel, kernel_arguments, calibration_params ) - def qbx_cost_per_stage(self, queue, geo_data, kernel, kernel_arguments, + def qbx_cost_per_stage(self, actx, geo_data, kernel, kernel_arguments, calibration_params): """This function additionally transfers geo_data to host if necessary """ @@ -956,11 +972,11 @@ def qbx_cost_per_stage(self, queue, geo_data, kernel, kernel_arguments, geo_data = ToHostTransferredGeoDataWrapper(geo_data) return AbstractQBXCostModel.qbx_cost_per_stage( - self, queue, geo_data, kernel, kernel_arguments, calibration_params + self, actx, geo_data, kernel, kernel_arguments, calibration_params ) def qbx_cost_factors_for_kernels_from_model( - self, queue, nlevels, xlat_cost, context): + self, actx, nlevels, xlat_cost, context): return AbstractQBXCostModel.qbx_cost_factors_for_kernels_from_model( self, None, nlevels, xlat_cost, context ) diff --git a/pytential/qbx/direct.py b/pytential/qbx/direct.py index 78e26375f..6eea9ba62 100644 --- a/pytential/qbx/direct.py +++ b/pytential/qbx/direct.py @@ -23,21 +23,28 @@ THE SOFTWARE. """ +from typing import TYPE_CHECKING + import numpy as np import loopy as lp -from loopy.version import MOST_RECENT_LANGUAGE_VERSION +from pytools import obj_array from sumpy.qbx import LayerPotentialBase -from pytential.version import PYTENTIAL_KERNEL_VERSION + +if TYPE_CHECKING: + from pytential.array_context import PyOpenCLArrayContext # {{{ qbx applier on a target/center subset class LayerPotentialOnTargetAndCenterSubset(LayerPotentialBase): - default_name = "qbx_tgt_ctr_subset" + @property + def default_name(self): + return "qbx_tgt_ctr_subset" def get_cache_key(self): + from pytential.version import PYTENTIAL_KERNEL_VERSION return (*super().get_cache_key(), PYTENTIAL_KERNEL_VERSION) def get_kernel(self): @@ -70,9 +77,11 @@ def get_kernel(self): for i in range(self.strength_count)] + [lp.GlobalArg(f"result_{i}", self.value_dtypes[i], shape="ntargets_total", order="C") - for i in range(len(self.target_kernels))]) + for i in range(self.nresults)]) - loopy_knl = lp.make_kernel([ + from pytential.array_context import make_loopy_program + + loopy_knl = make_loopy_program([ "{[itgt_local]: 0 <= itgt_local < ntargets}", "{[isrc]: 0 <= isrc < nsources}", "{[idim]: 0 <= idim < dim}" @@ -96,13 +105,13 @@ def get_kernel(self): simul_reduce(sum, isrc, pair_result_{i}) \ {{inames=itgt_local}} """.format(i=iknl) - for iknl in range(len(self.target_kernels))] + for iknl in range(self.nresults)] + ["end"], - arguments, + kernel_data=arguments, name=self.name, assumptions="ntargets>=1 and nsources>=1", fixed_parameters={"dim": self.dim}, - lang_version=MOST_RECENT_LANGUAGE_VERSION) + ) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for expn in self.source_kernels + self.target_kernels: @@ -110,10 +119,26 @@ def get_kernel(self): return loopy_knl - def __call__(self, queue, targets, sources, centers, strengths, expansion_radii, + def get_optimized_kernel(self, *, + is_cpu: bool, + targets_is_obj_array: bool, + sources_is_obj_array: bool, + centers_is_obj_array: bool): + return super().get_optimized_kernel( + is_cpu=is_cpu, + targets_is_obj_array=targets_is_obj_array, + sources_is_obj_array=sources_is_obj_array, + centers_is_obj_array=centers_is_obj_array, + itgt_name="itgt_local") + + def __call__(self, actx: PyOpenCLArrayContext, + targets, sources, centers, strengths, expansion_radii, **kwargs): + from sumpy.array_context import is_cl_cpu from sumpy.tools import is_obj_array_like + knl = self.get_cached_kernel_executor( + is_cpu=is_cl_cpu(actx), targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources), centers_is_obj_array=is_obj_array_like(centers)) @@ -121,13 +146,12 @@ def __call__(self, queue, targets, sources, centers, strengths, expansion_radii, for i, dens in enumerate(strengths): kwargs[f"strength_{i}"] = dens - return knl(queue, sources=sources, targets=targets, center=centers, + result = actx.call_loopy( + knl, + sources=sources, targets=targets, center=centers, expansion_radii=expansion_radii, **kwargs) - def get_optimized_kernel(self, - targets_is_obj_array, sources_is_obj_array, centers_is_obj_array): - return LayerPotentialBase.get_optimized_kernel(self, targets_is_obj_array, - sources_is_obj_array, centers_is_obj_array, itgt_name="itgt_local") + return obj_array.new_1d([result[f"result_{i}"] for i in range(self.nresults)]) # }}} diff --git a/pytential/qbx/fmm.py b/pytential/qbx/fmm.py index 258aecba8..8f66b59b8 100644 --- a/pytential/qbx/fmm.py +++ b/pytential/qbx/fmm.py @@ -29,13 +29,10 @@ from typing_extensions import override -import pyopencl.array as cl_array -from boxtree.timing import TimingRecorder -from pytools import ProcessLogger, log_process, memoize_method, obj_array +from pytools import ProcessLogger, log_process, memoize_method from sumpy.expansion import ExpansionFactoryBase from sumpy.fmm import ( SumpyExpansionWrangler, - SumpyTimingFuture, SumpyTreeIndependentDataForWrangler, ) @@ -48,6 +45,7 @@ from sumpy.expansion import LocalExpansionBase from sumpy.kernel import Kernel + from pytential.array_context import PyOpenCLArrayContext logger = logging.getLogger(__name__) @@ -72,14 +70,14 @@ def get_qbx_local_expansion_class(self, class QBXSumpyTreeIndependentDataForWrangler(SumpyTreeIndependentDataForWrangler): def __init__(self, - cl_context, + actx: PyOpenCLArrayContext, multipole_expansion_factory, local_expansion_factory, qbx_local_expansion_factory, target_kernels, source_kernels): super().__init__( - cl_context, multipole_expansion_factory, local_expansion_factory, + actx, multipole_expansion_factory, local_expansion_factory, target_kernels=target_kernels, source_kernels=source_kernels) self.qbx_local_expansion_factory: QBXExpansionFactory = ( @@ -91,26 +89,27 @@ def qbx_local_expansion(self, order): @memoize_method def p2qbxl(self, order): - return P2QBXLFromCSR(self.cl_context, - self.qbx_local_expansion(order), kernels=self.source_kernels) + return P2QBXLFromCSR( + self.qbx_local_expansion(order), + kernels=self.source_kernels) @memoize_method def m2qbxl(self, source_order, target_order): - return M2QBXL(self.cl_context, - self.multipole_expansion_factory(source_order), - self.qbx_local_expansion_factory(target_order)) + return M2QBXL( + self.multipole_expansion_factory(source_order), + self.qbx_local_expansion_factory(target_order)) @memoize_method def l2qbxl(self, source_order, target_order): - return L2QBXL(self.cl_context, - self.local_expansion_factory(source_order), - self.qbx_local_expansion_factory(target_order)) + return L2QBXL( + self.local_expansion_factory(source_order), + self.qbx_local_expansion_factory(target_order)) @memoize_method def qbxl2p(self, order): - return QBXL2P(self.cl_context, - self.qbx_local_expansion_factory(order), - self.target_kernels) + return QBXL2P( + self.qbx_local_expansion_factory(order), + self.target_kernels) @property def wrangler_cls(self): @@ -151,7 +150,7 @@ def __init__(self, tree_indep, geo_data, dtype, actx = geo_data._setup_actx translation_classes_data, _ = translation_classes_builder(actx)( - actx.queue, traversal, traversal.tree, is_translation_per_level=True) + actx, traversal, traversal.tree, is_translation_per_level=True) super().__init__( tree_indep, traversal, @@ -165,35 +164,29 @@ def __init__(self, tree_indep, geo_data, dtype, # {{{ data vector utilities @override - def output_zeros(self, template_ary: cl_array.Array): + def output_zeros(self, actx: PyOpenCLArrayContext): """This ought to be called ``non_qbx_output_zeros``, but since it has to override the superclass's behavior to integrate seamlessly, it needs to be called just :meth:`output_zeros`. """ + from pytools import obj_array nqbtl = self.geo_data.non_qbx_box_target_lists() - return obj_array.new_1d([ - cl_array.zeros( - template_ary.queue, - nqbtl.nfiltered_targets, - dtype=self.dtype) - for k in self.tree_indep.target_kernels]) + actx.zeros(nqbtl.nfiltered_targets, dtype=self.dtype) + for k in self.tree_indep.target_kernels + ]) - def full_output_zeros(self, template_ary): + def full_output_zeros(self, actx: PyOpenCLArrayContext): # The superclass generates a full field of zeros, for all # (not just non-QBX) targets. - return super().output_zeros(template_ary) + return super().output_zeros(actx) - def qbx_local_expansion_zeros(self, template_ary): + def qbx_local_expansion_zeros(self, actx: PyOpenCLArrayContext): order = self.qbx_order qbx_l_expn = self.tree_indep.qbx_local_expansion(order) - return cl_array.zeros( - template_ary.queue, - (self.geo_data.ncenters, - len(qbx_l_expn)), - dtype=self.dtype) + return actx.zeros((self.geo_data.ncenters, len(qbx_l_expn)), self.dtype) def reorder_sources(self, source_array): return source_array[self.tree.user_source_ids] @@ -227,15 +220,12 @@ def box_target_list_kwargs(self): # {{{ qbx-related @log_process(logger) - def form_global_qbx_locals(self, src_weight_vecs): - queue = src_weight_vecs[0].queue - - local_exps = self.qbx_local_expansion_zeros(src_weight_vecs[0]) - events = [] + def form_global_qbx_locals(self, actx: PyOpenCLArrayContext, src_weight_vecs): + local_exps = self.qbx_local_expansion_zeros(actx) geo_data = self.geo_data if len(geo_data.global_qbx_centers()) == 0: - return (local_exps, SumpyTimingFuture(queue, events)) + return local_exps traversal = geo_data.traversal() @@ -247,8 +237,8 @@ def form_global_qbx_locals(self, src_weight_vecs): p2qbxl = self.tree_indep.p2qbxl(self.qbx_order) - evt, (result,) = p2qbxl( - queue, + result = p2qbxl( + actx, global_qbx_centers=geo_data.global_qbx_centers(), qbx_center_to_target_box=geo_data.qbx_center_to_target_box(), qbx_centers=geo_data.flat_centers(), @@ -260,22 +250,18 @@ def form_global_qbx_locals(self, src_weight_vecs): qbx_expansions=local_exps, **kwargs) - - events.append(evt) assert local_exps is result - result.add_event(evt) - return (result, SumpyTimingFuture(queue, events)) + return result @log_process(logger) - def translate_box_multipoles_to_qbx_local(self, multipole_exps): - queue = multipole_exps.queue - qbx_expansions = self.qbx_local_expansion_zeros(multipole_exps) - events = [] + def translate_box_multipoles_to_qbx_local( + self, actx: PyOpenCLArrayContext, multipole_exps): + qbx_expansions = self.qbx_local_expansion_zeros(actx) geo_data = self.geo_data if geo_data.ncenters == 0: - return (qbx_expansions, SumpyTimingFuture(queue, events)) + return qbx_expansions traversal = geo_data.traversal() @@ -289,7 +275,7 @@ def translate_box_multipoles_to_qbx_local(self, multipole_exps): source_level_start_ibox, source_mpoles_view = \ self.multipole_expansions_view(multipole_exps, isrc_level) - evt, (qbx_expansions_res,) = m2qbxl(queue, + qbx_expansions_res = m2qbxl(actx, qbx_center_to_target_box_source_level=( geo_data.qbx_center_to_target_box_source_level(isrc_level) ), @@ -311,27 +297,21 @@ def translate_box_multipoles_to_qbx_local(self, multipole_exps): **self.kernel_extra_kwargs) - events.append(evt) - wait_for = [evt] assert qbx_expansions_res is qbx_expansions - qbx_expansions.add_event(evt) - - return (qbx_expansions, SumpyTimingFuture(queue, events)) + return qbx_expansions @log_process(logger) - def translate_box_local_to_qbx_local(self, local_exps): - queue = local_exps.queue - qbx_expansions = self.qbx_local_expansion_zeros(local_exps) + def translate_box_local_to_qbx_local( + self, actx: PyOpenCLArrayContext, local_exps): + qbx_expansions = self.qbx_local_expansion_zeros(actx) geo_data = self.geo_data - events = [] if geo_data.ncenters == 0: - return (qbx_expansions, SumpyTimingFuture(queue, events)) + return qbx_expansions trav = geo_data.traversal() - wait_for = local_exps.events for isrc_level in range(geo_data.tree().nlevels): @@ -342,8 +322,8 @@ def translate_box_local_to_qbx_local(self, local_exps): target_level_start_ibox, target_locals_view = \ self.local_expansions_view(local_exps, isrc_level) - evt, (qbx_expansions_res,) = l2qbxl( - queue, + qbx_expansions_res = l2qbxl( + actx, qbx_center_to_target_box=geo_data.qbx_center_to_target_box(), target_boxes=trav.target_boxes, target_base_ibox=target_level_start_ibox, @@ -361,30 +341,23 @@ def translate_box_local_to_qbx_local(self, local_exps): **self.kernel_extra_kwargs) - events.append(evt) - wait_for = [evt] assert qbx_expansions_res is qbx_expansions - qbx_expansions.add_event(evt) - - return (qbx_expansions, SumpyTimingFuture(queue, events)) + return qbx_expansions @log_process(logger) - def eval_qbx_expansions(self, qbx_expansions): - queue = qbx_expansions.queue - pot = self.full_output_zeros(qbx_expansions) + def eval_qbx_expansions(self, actx: PyOpenCLArrayContext, qbx_expansions): + pot = self.full_output_zeros(actx) geo_data = self.geo_data - events = [] if len(geo_data.global_qbx_centers()) == 0: - return (pot, SumpyTimingFuture(queue, events)) + return pot ctt = geo_data.center_to_tree_targets() - qbxl2p = self.tree_indep.qbxl2p(self.qbx_order) - _, pot_res = qbxl2p(queue, + pot_res = qbxl2p(actx, qbx_centers=geo_data.flat_centers(), qbx_expansion_radii=geo_data.flat_expansion_radii(), @@ -403,24 +376,23 @@ def eval_qbx_expansions(self, qbx_expansions): for pot_i, pot_res_i in zip(pot, pot_res, strict=True): assert pot_i is pot_res_i - return (pot, SumpyTimingFuture(queue, events)) + return pot @log_process(logger) - def eval_target_specific_qbx_locals(self, src_weight_vecs): - template_ary = src_weight_vecs[0] - return (self.full_output_zeros(template_ary), - SumpyTimingFuture(template_ary.queue, events=())) + def eval_target_specific_qbx_locals( + self, actx: PyOpenCLArrayContext, src_weight_vecs): + return self.full_output_zeros(actx) # }}} -def translation_classes_builder(actx): +def translation_classes_builder(actx: PyOpenCLArrayContext): from pytools import memoize_in @memoize_in(actx, (QBXExpansionWrangler, translation_classes_builder)) def make_container(): from boxtree.translation_classes import TranslationClassesBuilder - return TranslationClassesBuilder(actx.context) + return TranslationClassesBuilder(actx) return make_container() @@ -429,31 +401,23 @@ def make_container(): # {{{ FMM top-level -def drive_fmm(expansion_wrangler, src_weight_vecs, timing_data=None): +def drive_fmm(actx: PyOpenCLArrayContext, expansion_wrangler, src_weight_vecs): """Top-level driver routine for the QBX fast multipole calculation. - :arg geo_data: A :class:`pytential.qbx.geometry.QBXFMMGeometryData` instance. :arg expansion_wrangler: An object exhibiting the :class:`boxtree.fmm.ExpansionWranglerInterface`. :arg src_weight_vecs: A sequence of source 'density/weights/charges'. Passed unmodified to *expansion_wrangler*. - :arg timing_data: Either *None* or a dictionary that collects - timing data. Returns the potentials computed by *expansion_wrangler*. See also :func:`boxtree.fmm.drive_fmm`. """ wrangler = expansion_wrangler - geo_data = wrangler.geo_data traversal = wrangler.traversal tree = traversal.tree - template_ary = src_weight_vecs[0] - - recorder = TimingRecorder() - # Interface guidelines: Attributes of the tree are assumed to be known # to the expansion wrangler and should not be passed. @@ -464,49 +428,45 @@ def drive_fmm(expansion_wrangler, src_weight_vecs, timing_data=None): # {{{ construct local multipoles - mpole_exps, timing_future = wrangler.form_multipoles( + mpole_exps = wrangler.form_multipoles( + actx, traversal.level_start_source_box_nrs, traversal.source_boxes, src_weight_vecs) - recorder.add("form_multipoles", timing_future) - # }}} # {{{ propagate multipoles upward - mpole_exps, timing_future = wrangler.coarsen_multipoles( + mpole_exps = wrangler.coarsen_multipoles( + actx, traversal.level_start_source_parent_box_nrs, traversal.source_parent_boxes, mpole_exps) - recorder.add("coarsen_multipoles", timing_future) - # }}} # {{{ direct evaluation from neighbor source boxes ("list 1") - non_qbx_potentials, timing_future = wrangler.eval_direct( + non_qbx_potentials = wrangler.eval_direct( + actx, traversal.target_boxes, traversal.neighbor_source_boxes_starts, traversal.neighbor_source_boxes_lists, src_weight_vecs) - recorder.add("eval_direct", timing_future) - # }}} # {{{ translate separated siblings' ("list 2") mpoles to local - local_exps, timing_future = wrangler.multipole_to_local( + local_exps = wrangler.multipole_to_local( + actx, traversal.level_start_target_or_target_parent_box_nrs, traversal.target_or_target_parent_boxes, traversal.from_sep_siblings_starts, traversal.from_sep_siblings_lists, mpole_exps) - recorder.add("multipole_to_local", timing_future) - # }}} # {{{ evaluate sep. smaller mpoles ("list 3") at particles @@ -514,13 +474,12 @@ def drive_fmm(expansion_wrangler, src_weight_vecs, timing_data=None): # (the point of aiming this stage at particles is specifically to keep its # contribution *out* of the downward-propagating local expansions) - mpole_result, timing_future = wrangler.eval_multipoles( + mpole_result = wrangler.eval_multipoles( + actx, traversal.target_boxes_sep_smaller_by_source_level, traversal.from_sep_smaller_by_level, mpole_exps) - recorder.add("eval_multipoles", timing_future) - non_qbx_potentials = non_qbx_potentials + mpole_result # assert that list 3 close has been merged into list 1 @@ -530,15 +489,14 @@ def drive_fmm(expansion_wrangler, src_weight_vecs, timing_data=None): # {{{ form locals for separated bigger source boxes ("list 4") - local_result, timing_future = wrangler.form_locals( + local_result = wrangler.form_locals( + actx, traversal.level_start_target_or_target_parent_box_nrs, traversal.target_or_target_parent_boxes, traversal.from_sep_bigger_starts, traversal.from_sep_bigger_lists, src_weight_vecs) - recorder.add("form_locals", timing_future) - local_exps = local_exps + local_result # assert that list 4 close has been merged into list 1 @@ -548,24 +506,22 @@ def drive_fmm(expansion_wrangler, src_weight_vecs, timing_data=None): # {{{ propagate local_exps downward - local_exps, timing_future = wrangler.refine_locals( + local_exps = wrangler.refine_locals( + actx, traversal.level_start_target_or_target_parent_box_nrs, traversal.target_or_target_parent_boxes, local_exps) - recorder.add("refine_locals", timing_future) - # }}} # {{{ evaluate locals - local_result, timing_future = wrangler.eval_locals( + local_result = wrangler.eval_locals( + actx, traversal.level_start_target_box_nrs, traversal.target_boxes, local_exps) - recorder.add("eval_locals", timing_future) - non_qbx_potentials = non_qbx_potentials + local_result # }}} @@ -577,42 +533,34 @@ def drive_fmm(expansion_wrangler, src_weight_vecs, timing_data=None): # via unified List 1). Which one is used depends on the wrangler. If one of # them is unused the corresponding output entries will be zero. - qbx_expansions, timing_future = wrangler.form_global_qbx_locals(src_weight_vecs) + qbx_expansions = ( + wrangler.form_global_qbx_locals(actx, src_weight_vecs)) - recorder.add("form_global_qbx_locals", timing_future) - - local_result, timing_future = ( - wrangler.translate_box_multipoles_to_qbx_local(mpole_exps)) - - recorder.add("translate_box_multipoles_to_qbx_local", timing_future) + local_result = ( + wrangler.translate_box_multipoles_to_qbx_local(actx, mpole_exps)) qbx_expansions = qbx_expansions + local_result - local_result, timing_future = ( - wrangler.translate_box_local_to_qbx_local(local_exps)) - - recorder.add("translate_box_local_to_qbx_local", timing_future) + local_result = ( + wrangler.translate_box_local_to_qbx_local(actx, local_exps)) qbx_expansions = qbx_expansions + local_result - qbx_potentials, timing_future = wrangler.eval_qbx_expansions(qbx_expansions) + qbx_potentials = ( + wrangler.eval_qbx_expansions(actx, qbx_expansions)) - recorder.add("eval_qbx_expansions", timing_future) - - ts_result, timing_future = \ - wrangler.eval_target_specific_qbx_locals(src_weight_vecs) + ts_result = ( + wrangler.eval_target_specific_qbx_locals(actx, src_weight_vecs)) qbx_potentials = qbx_potentials + ts_result - recorder.add("eval_target_specific_qbx_locals", timing_future) - # }}} # {{{ reorder potentials nqbtl = geo_data.non_qbx_box_target_lists() - all_potentials_in_tree_order = wrangler.full_output_zeros(template_ary) + all_potentials_in_tree_order = wrangler.full_output_zeros(actx) for ap_i, nqp_i in zip(all_potentials_in_tree_order, non_qbx_potentials, strict=False): @@ -623,7 +571,7 @@ def drive_fmm(expansion_wrangler, src_weight_vecs, timing_data=None): def reorder_and_finalize_potentials(x): # "finalize" gives host FMMs (like FMMlib) a chance to turn the # potential back into a CL array. - return wrangler.finalize_potentials(x[tree.sorted_target_ids], template_ary) + return wrangler.finalize_potentials(actx, x[tree.sorted_target_ids]) from pytools import obj_array result = obj_array.vectorize( @@ -633,8 +581,6 @@ def reorder_and_finalize_potentials(x): fmm_proc.done() - if timing_data is not None: - timing_data.update(recorder.summarize()) return result # }}} diff --git a/pytential/qbx/fmmlib.py b/pytential/qbx/fmmlib.py index bd7bd0a02..0dc59f871 100644 --- a/pytential/qbx/fmmlib.py +++ b/pytential/qbx/fmmlib.py @@ -24,17 +24,15 @@ """ import logging +from typing import TYPE_CHECKING, Any import numpy as np -import pyopencl as cl -import pyopencl.array from boxtree.pyfmmlib_integration import ( FMMLibExpansionWrangler, FMMLibTreeIndependentDataForWrangler, Kernel, ) -from boxtree.timing import return_timing_data from pytools import log_process, memoize_method from sumpy.kernel import ( AxisTargetDerivative, @@ -46,15 +44,18 @@ import pytential.qbx.target_specific as ts +if TYPE_CHECKING: + from pytential.array_context import PyOpenCLArrayContext + logger = logging.getLogger(__name__) class QBXFMMLibTreeIndependentDataForWrangler(FMMLibTreeIndependentDataForWrangler): - def __init__(self, cl_context, *, + def __init__(self, actx: PyOpenCLArrayContext, *, multipole_expansion_factory, local_expansion_factory, qbx_local_expansion_factory, target_kernels, _use_target_specific_qbx): - self.cl_context = cl_context + self._setup_actx = actx self.multipole_expansion_factory = multipole_expansion_factory self.local_expansion_factory = local_expansion_factory self.qbx_local_expansion_factory = qbx_local_expansion_factory @@ -65,7 +66,7 @@ def __init__(self, cl_context, *, # {{{ digest target_kernels ifgrad = False - outputs = [] + outputs: list[tuple[Any, ...]] = [] source_deriv_names = [] k_names = [] @@ -177,12 +178,11 @@ def __init__(self, tree_indep, geo_data, dtype, dipole_vec = None if tree_indep.source_deriv_name is not None: - with cl.CommandQueue(tree_indep.cl_context) as queue: - dipole_vec = np.array([ - d_i.get(queue=queue) - for d_i in source_extra_kwargs[ - tree_indep.source_deriv_name]], - order="F") + dipole_vec = np.array([ + tree_indep._setup_actx.to_numpy(d_i) + for d_i in source_extra_kwargs[ + tree_indep.source_deriv_name]], + order="F") def inner_fmm_level_to_order(tree, level): if helmholtz_k == 0: @@ -205,6 +205,10 @@ def inner_fmm_level_to_order(tree, level): fmm_level_to_order=inner_fmm_level_to_order, rotation_data=geo_data) + @property + def _setup_actx(self): + return self.tree_indep._setup_actx + # {{{ data vector helpers def output_zeros(self): @@ -220,7 +224,7 @@ def output_zeros(self): np.zeros(nqbtl.nfiltered_targets, self.tree_indep.dtype) for k in self.tree_indep.outputs]) - def full_output_zeros(self, template_ary): + def full_output_zeros(self, actx: PyOpenCLArrayContext): """This includes QBX and non-QBX targets.""" from pytools import obj_array @@ -229,8 +233,8 @@ def full_output_zeros(self, template_ary): for k in self.tree_indep.outputs]) def reorder_sources(self, source_array): - if isinstance(source_array, cl.array.Array): - source_array = source_array.get() + if isinstance(source_array, self._setup_actx.array_types): + source_array = self._setup_actx.to_numpy(source_array) return super().reorder_sources(source_array) @@ -288,8 +292,7 @@ def qbx_local_expansion_zeros(self): # {{{ p2qbxl @log_process(logger) - @return_timing_data - def form_global_qbx_locals(self, src_weight_vecs): + def form_global_qbx_locals(self, actx: PyOpenCLArrayContext, src_weight_vecs): src_weights, = src_weight_vecs if self.tree_indep.using_tsqbx: return self.qbx_local_expansion_zeros() @@ -345,8 +348,7 @@ def form_global_qbx_locals(self, src_weight_vecs): # {{{ m2qbxl @log_process(logger) - @return_timing_data - def translate_box_multipoles_to_qbx_local(self, multipole_exps): + def translate_box_multipoles_to_qbx_local(self, actx, multipole_exps): qbx_exps = self.qbx_local_expansion_zeros() geo_data = self.geo_data @@ -458,8 +460,7 @@ def translate_box_multipoles_to_qbx_local(self, multipole_exps): # }}} @log_process(logger) - @return_timing_data - def translate_box_local_to_qbx_local(self, local_exps): + def translate_box_local_to_qbx_local(self, actx, local_exps): qbx_expansions = self.qbx_local_expansion_zeros() geo_data = self.geo_data @@ -551,9 +552,8 @@ def translate_box_local_to_qbx_local(self, local_exps): return qbx_expansions @log_process(logger) - @return_timing_data - def eval_qbx_expansions(self, qbx_expansions): - output = self.full_output_zeros(template_ary=qbx_expansions) + def eval_qbx_expansions(self, actx, qbx_expansions): + output = self.full_output_zeros(actx) geo_data = self.geo_data ctt = geo_data.center_to_tree_targets() @@ -586,11 +586,10 @@ def eval_qbx_expansions(self, qbx_expansions): return output @log_process(logger) - @return_timing_data - def eval_target_specific_qbx_locals(self, src_weight_vecs): + def eval_target_specific_qbx_locals(self, actx, src_weight_vecs): src_weights, = src_weight_vecs if not self.tree_indep.using_tsqbx: - return self.full_output_zeros(template_ary=src_weights) + return self.full_output_zeros(actx) geo_data = self.geo_data trav = geo_data.traversal() @@ -635,14 +634,14 @@ def eval_target_specific_qbx_locals(self, src_weight_vecs): pot=pot, grad=grad) - output = self.full_output_zeros(template_ary=src_weights) + output = self.full_output_zeros(actx) self.add_potgrad_onto_output(output, slice(None), pot, grad) return output - def finalize_potentials(self, potential, template_ary): - potential = super().finalize_potentials(potential, template_ary) - return cl.array.to_device(template_ary.queue, potential) + def finalize_potentials(self, actx, potential): + potential = super().finalize_potentials(actx, potential) + return actx.from_numpy(potential) # }}} diff --git a/pytential/qbx/geometry.py b/pytential/qbx/geometry.py index bd5cf39ef..e5b94f380 100644 --- a/pytential/qbx/geometry.py +++ b/pytential/qbx/geometry.py @@ -23,21 +23,20 @@ THE SOFTWARE. """ - import logging +from dataclasses import dataclass from typing import TYPE_CHECKING, cast import numpy as np from cgen import Enum import loopy as lp -from arraycontext import PyOpenCLArrayContext, flatten +from arraycontext import Array, PyOpenCLArrayContext, flatten from boxtree.pyfmmlib_integration import FMMLibRotationDataInterface -from boxtree.tools import DeviceDataRecord -from loopy.version import MOST_RECENT_LANGUAGE_VERSION from meshmode.dof_array import DOFArray from pytools import log_process, memoize_in, memoize_method +from pytential.array_context import dataclass_array_container from pytential.qbx.utils import TreeCodeContainerMixin @@ -74,9 +73,9 @@ Subordinate data structures ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. autoclass:: TargetInfo() +.. autoclass:: TargetInfo -.. autoclass:: CenterToTargetList() +.. autoclass:: CenterToTargetList Enums of special values ^^^^^^^^^^^^^^^^^^^^^^^ @@ -149,7 +148,7 @@ def copy_targets_kernel(self): targets[dim, i] = points[dim, i] """, default_offset=lp.auto, name="copy_targets", - lang_version=MOST_RECENT_LANGUAGE_VERSION) + lang_version=lp.MOST_RECENT_LANGUAGE_VERSION) knl = lp.fix_parameters(knl, ndims=self.ambient_dim) @@ -165,7 +164,7 @@ def copy_targets_kernel(self): def build_traversal(self): from boxtree.traversal import FMMTraversalBuilder return FMMTraversalBuilder( - self._setup_actx.context, + self._setup_actx, well_sep_is_n_away=self._well_sep_is_n_away, from_sep_smaller_crit=self._from_sep_smaller_crit, ) @@ -208,7 +207,7 @@ def qbx_center_to_target_box_lookup(self, particle_id_dtype, box_id_dtype): ], name="qbx_center_to_target_box_lookup", silenced_warnings="write_race(tgt_write)", - lang_version=MOST_RECENT_LANGUAGE_VERSION) + lang_version=lp.MOST_RECENT_LANGUAGE_VERSION) knl = lp.split_iname(knl, "ibox", 128, inner_tag="l.0", outer_tag="g.0") @@ -219,7 +218,7 @@ def qbx_center_to_target_box_lookup(self, particle_id_dtype, box_id_dtype): @memoize_method def build_leaf_to_ball_lookup(self): from boxtree.area_query import LeavesToBallsLookupBuilder - return LeavesToBallsLookupBuilder(self._setup_actx.context) + return LeavesToBallsLookupBuilder(self._setup_actx) @property @memoize_method @@ -271,7 +270,7 @@ def pick_used_centers(self): ], name="pick_used_centers", silenced_warnings="write_race(center_is_used_write)", - lang_version=MOST_RECENT_LANGUAGE_VERSION) + lang_version=lp.MOST_RECENT_LANGUAGE_VERSION) knl = lp.split_iname(knl, "i", 128, inner_tag="l.0", outer_tag="g.0") return knl.executor(self._setup_actx.context) @@ -280,7 +279,7 @@ def pick_used_centers(self): @memoize_method def rotation_classes_builder(self): from boxtree.rotation_classes import RotationClassesBuilder - return RotationClassesBuilder(self._setup_actx.context) + return RotationClassesBuilder(self._setup_actx) def qbx_fmm_geometry_data_code_container( @@ -307,7 +306,8 @@ def make_container( # {{{ geometry data -class TargetInfo(DeviceDataRecord): +@dataclass(frozen=True) +class TargetInfo: """Describes the internal structure of the QBX FMM's list of :attr:`targets`. The list consists of QBX centers, then target points for each target discretization. The starts of the target points for @@ -330,8 +330,14 @@ class TargetInfo(DeviceDataRecord): .. attribute:: ntargets """ + targets: Array + target_discr_starts: Array + ntargets: int + -class CenterToTargetList(DeviceDataRecord): +@dataclass_array_container +@dataclass(frozen=True) +class CenterToTargetList: """A lookup table of targets covered by each QBX disk. Indexed by global number of QBX center, ``lists[start[i]:start[i+1]]`` indicates numbers of the overlapped targets in tree target order. @@ -347,6 +353,9 @@ class CenterToTargetList(DeviceDataRecord): Lists of targets in tree order. Use with :attr:`starts`. """ + starts: Array + lists: Array + class QBXFMMGeometryData(FMMLibRotationDataInterface): """ @@ -586,7 +595,7 @@ def tree(self): refine_weights.finish() - tree, _ = code_getter.build_tree()(actx.queue, + tree, _ = code_getter.build_tree()(actx, particles=flatten( quad_stage2_discr.nodes(), actx, leaf_class=DOFArray ), @@ -603,7 +612,7 @@ def tree(self): tgt_count_2 = actx.to_numpy(actx.np.sum(tree.box_target_counts_nonchild)) assert (tree.ntargets == tgt_count_2), (tree.ntargets, tgt_count_2) - return tree.with_queue(None) + return actx.freeze(tree) # }}} @@ -618,15 +627,15 @@ def traversal(self, merge_close_lists=True): """ actx = self._setup_actx - trav, _ = self.code_getter.build_traversal(actx.queue, self.tree(), + trav, _ = self.code_getter.build_traversal(actx, self.tree(), debug=self.debug, _from_sep_smaller_min_nsources_cumul=( self.lpot_source._from_sep_smaller_min_nsources_cumul)) if merge_close_lists and self.lpot_source._expansions_in_tree_have_extent: - trav = trav.merge_close_lists(actx.queue) + trav = trav.merge_close_lists(actx) - return trav.with_queue(None) + return actx.freeze(trav) @memoize_method def qbx_center_to_target_box(self): @@ -882,9 +891,9 @@ def non_qbx_box_target_lists(self): tree = self.tree() plfilt = self.code_getter.particle_list_filter() - result = plfilt.filter_target_lists_in_tree_order(actx.queue, tree, flags) + result = plfilt.filter_target_lists_in_tree_order(actx, tree, flags) - return result.with_queue(None) + return actx.freeze(result) @memoize_method def build_rotation_classes_lists(self): @@ -892,8 +901,8 @@ def build_rotation_classes_lists(self): trav = self.traversal() tree = self.tree() - result = self.code_getter.rotation_classes_builder(actx.queue, trav, tree) - return result[0].get(queue=actx.queue) + result = self.code_getter.rotation_classes_builder(actx, trav, tree) + return actx.to_numpy(result[0]) @memoize_method def m2l_rotation_lists(self): @@ -940,7 +949,7 @@ def plot(self, draw_circles=False, draw_center_numbers=False, global_flags = actx.to_numpy(self.global_qbx_flags()) - tree = self.tree().get(queue=actx.queue) + tree = actx.to_numpy(self.tree()) from boxtree.visualization import TreePlotter tp = TreePlotter(tree) tp.draw_tree() diff --git a/pytential/qbx/interactions.py b/pytential/qbx/interactions.py index 3818b8ce2..3bc4581c0 100644 --- a/pytential/qbx/interactions.py +++ b/pytential/qbx/interactions.py @@ -26,19 +26,21 @@ import numpy as np import loopy as lp -from loopy.version import MOST_RECENT_LANGUAGE_VERSION -from pytools import memoize_method +from pytools import memoize_method, obj_array from sumpy.e2e import E2EBase from sumpy.e2p import E2PBase from sumpy.p2e import P2EBase +from pytential.array_context import PyOpenCLArrayContext, make_loopy_program from pytential.version import PYTENTIAL_KERNEL_VERSION # {{{ form qbx expansions from points class P2QBXLFromCSR(P2EBase): - default_name = "p2qbxl_from_csr" + @property + def default_name(self): + return "p2qbxl_from_csr" def get_cache_key(self): return (*super().get_cache_key(), PYTENTIAL_KERNEL_VERSION) @@ -70,7 +72,7 @@ def get_kernel(self): ... ]) - loopy_knl = lp.make_kernel( + loopy_knl = make_loopy_program( [ "{[itgt_center]: 0<=itgt_center tgt_icenter = global_qbx_centers[itgt_center] @@ -122,14 +124,13 @@ def get_kernel(self): end """], arguments, - name=self.name, assumptions="ntgt_centers>=1", + name=self.name, + assumptions="ntgt_centers>=1", silenced_warnings="write_race(write_expn*)", fixed_parameters={ "dim": self.dim, "nstrengths": self.strength_count, - "ncoeffs": ncoeffs}, - default_offset=lp.auto, - lang_version=MOST_RECENT_LANGUAGE_VERSION) + "ncoeffs": ncoeffs}) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.tag_inames(loopy_knl, "istrength*:unr") @@ -151,15 +152,22 @@ def get_optimized_kernel(self, is_sources_obj_array, is_centers_obj_array): knl = self._allow_redundant_execution_of_knl_scaling(knl) return knl - def __call__(self, queue, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, **kwargs): sources = kwargs.pop("sources") qbx_centers = kwargs.pop("qbx_centers") from sumpy.tools import is_obj_array_like - return self.get_cached_kernel_executor( + knl = self.get_cached_kernel_executor( is_sources_obj_array=is_obj_array_like(sources), is_centers_obj_array=is_obj_array_like(qbx_centers), - )(queue, sources=sources, qbx_centers=qbx_centers, **kwargs) + ) + + result = actx.call_loopy( + knl, + sources=sources, + qbx_centers=qbx_centers, **kwargs) + + return result["qbx_expansions"] # }}} @@ -171,7 +179,9 @@ class M2QBXL(E2EBase): list. """ - default_name = "m2qbxl_from_csr" + @property + def default_name(self): + return "m2qbxl_from_csr" def get_cache_key(self): return (*super().get_cache_key(), PYTENTIAL_KERNEL_VERSION) @@ -181,12 +191,11 @@ def get_kernel(self): ncoeff_tgt = len(self.tgt_expansion) from sumpy.tools import gather_loopy_arguments - loopy_knl = lp.make_kernel( - [ - "{[icenter]: 0<=icenter icontaining_tgt_box = \ @@ -244,16 +253,16 @@ def get_kernel(self): *gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), ... ], - name=self.name, assumptions="ncenters>=1", + name=self.name, + assumptions="ncenters>=1", silenced_warnings="write_race(write_expn*)", fixed_parameters={"dim": self.dim}, - lang_version=MOST_RECENT_LANGUAGE_VERSION) + ) + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) - loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") - return loopy_knl @memoize_method @@ -268,7 +277,7 @@ def get_optimized_kernel(self, is_centers_obj_array): knl = self._allow_redundant_execution_of_knl_scaling(knl) return knl - def __call__(self, queue, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, **kwargs): centers = kwargs.pop("centers") # "1" may be passed for rscale, which won't have its type # meaningfully inferred. Make the type of rscale explicit. @@ -276,11 +285,17 @@ def __call__(self, queue, **kwargs): from sumpy.tools import is_obj_array_like qbx_centers = kwargs.pop("qbx_centers") - return self.get_cached_kernel_executor( + knl = self.get_cached_kernel_executor( is_centers_obj_array=is_obj_array_like(qbx_centers), - )(queue, centers=centers, - qbx_centers=qbx_centers, - src_rscale=src_rscale, **kwargs) + ) + + result = actx.call_loopy( + knl, + centers=centers, + qbx_centers=qbx_centers, + src_rscale=src_rscale, **kwargs) + + return result["qbx_expansions"] # }}} @@ -288,7 +303,9 @@ def __call__(self, queue, **kwargs): # {{{ translation from a center's box class L2QBXL(E2EBase): - default_name = "l2qbxl" + @property + def default_name(self): + return "l2qbxl" def get_cache_key(self): return (*super().get_cache_key(), PYTENTIAL_KERNEL_VERSION) @@ -298,11 +315,10 @@ def get_kernel(self): ncoeff_tgt = len(self.tgt_expansion) from sumpy.tools import gather_loopy_arguments - loopy_knl = lp.make_kernel( - [ - "{[icenter]: 0<=icenter isrc_box = qbx_center_to_target_box[icenter] @@ -358,13 +374,12 @@ def get_kernel(self): assumptions="ncenters>=1", silenced_warnings="write_race(write_expn*)", fixed_parameters={"dim": self.dim, "nchildren": 2**self.dim}, - lang_version=MOST_RECENT_LANGUAGE_VERSION) + ) + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) - loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") - return loopy_knl @memoize_method @@ -379,7 +394,7 @@ def get_optimized_kernel(self, is_centers_obj_array): knl = self._allow_redundant_execution_of_knl_scaling(knl) return knl - def __call__(self, queue, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, **kwargs): centers = kwargs.pop("centers") # "1" may be passed for rscale, which won't have its type # meaningfully inferred. Make the type of rscale explicit. @@ -387,11 +402,17 @@ def __call__(self, queue, **kwargs): from sumpy.tools import is_obj_array_like qbx_centers = kwargs.pop("qbx_centers") - return self.get_cached_kernel_executor( + knl = self.get_cached_kernel_executor( is_centers_obj_array=is_obj_array_like(qbx_centers), - )(queue, centers=centers, - qbx_centers=qbx_centers, - src_rscale=src_rscale, **kwargs) + ) + + result = actx.call_loopy( + knl, + centers=centers, + qbx_centers=qbx_centers, + src_rscale=src_rscale, **kwargs) + + return result["qbx_expansions"] # }}} @@ -399,7 +420,9 @@ def __call__(self, queue, **kwargs): # {{{ evaluation of qbx expansions class QBXL2P(E2PBase): - default_name = "qbx_potential_from_local" + @property + def default_name(self): + return "qbx_potential_from_local" def get_cache_key(self): return (*super().get_cache_key(), PYTENTIAL_KERNEL_VERSION) @@ -408,7 +431,7 @@ def get_kernel(self): ncoeffs = len(self.expansion) loopy_args = [arg.loopy_arg for arg in self.expansion.get_args()] - loopy_knl = lp.make_kernel( + loopy_knl = make_loopy_program( [ "{[iglobal_center]: 0<=iglobal_center TreeCodeContainer: @@ -139,10 +142,6 @@ def __init__(self, array_context: PyOpenCLArrayContext, code_container): self.code_container = code_container self.array_context = array_context - @property - def queue(self): - return self.array_context.queue - def build_tree(self, places, targets_list=(), sources_list=(), use_stage2_discr=False): tb = self.code_container.build_tree() @@ -156,7 +155,7 @@ def build_tree(self, places, targets_list=(), sources_list=(), def find_peer_lists(self, tree): plf = self.code_container.peer_list_finder() - peer_lists, evt = plf(self.queue, tree) + peer_lists, evt = plf(self.array_context, tree) import pyopencl as cl cl.wait_for_events([evt]) @@ -168,6 +167,8 @@ def find_peer_lists(self, tree): # {{{ tree-with-metadata: data structure +@dataclass_array_container +@dataclass(frozen=True) class TreeWithQBXMetadata(Tree): """A subclass of :class:`boxtree.tree.Tree`. Has all of that class's attributes, along with the following: @@ -241,7 +242,27 @@ class TreeWithQBXMetadata(Tree): .. attribute:: qbx_user_center_slice .. attribute:: qbx_user_target_slice """ - pass + + nqbxelements: int + nqbxsources: int + nqbxcenters: int + nqbxtargets: int + + box_to_qbx_source_starts: Array + box_to_qbx_source_lists: Array + + box_to_qbx_center_starts: Array + box_to_qbx_center_lists: Array + + box_to_qbx_target_starts: Array + box_to_qbx_target_lists: Array + + qbx_element_to_source_starts: Array + qbx_element_to_center_starts: Array | None + + qbx_user_source_slice: slice + qbx_user_center_slice: slice + qbx_user_target_slice: slice # }}} @@ -315,7 +336,6 @@ def _make_centers(discr): flatten(tgt.nodes(), actx, leaf_class=DOFArray) for tgt in targets_list] - queue = actx.queue particles = tuple( actx.np.concatenate(dim_coords) for dim_coords in zip(sources, centers, *targets, strict=True)) @@ -346,7 +366,7 @@ def _make_centers(discr): refine_weights.finish() - tree, _ = tree_builder(queue, particles, + tree, _ = tree_builder(actx, particles, max_leaf_refine_weight=MAX_REFINE_WEIGHT, refine_weights=refine_weights) @@ -363,30 +383,32 @@ def _make_centers(discr): flags[particle_slice].fill(1) flags.finish() - box_to_class = ( + box_to_class = actx.thaw( particle_list_filter - .filter_target_lists_in_user_order(queue, tree, flags) - ).with_queue(actx.queue) + .filter_target_lists_in_user_order(actx, tree, flags) + ) if fixup: - box_to_class.target_lists += fixup - particle_classes[class_name + "_starts"] = box_to_class.target_starts - particle_classes[class_name + "_lists"] = box_to_class.target_lists + box_to_class = replace(box_to_class, + target_lists=box_to_class.target_lists + fixup) + + particle_classes[f"{class_name}_starts"] = box_to_class.target_starts + particle_classes[f"{class_name}_lists"] = box_to_class.target_lists del flags del box_to_class # Compute element => source relation - qbx_element_to_source_starts = cast("cl_array.Array", - actx.np.zeros(nelements + 1, tree.particle_id_dtype)) + qbx_element_to_source_starts = actx.np.zeros(nelements + 1, tree.particle_id_dtype) + el_offset = 0 node_nr_base = 0 for group in density_discr.groups: group_element_starts = np.arange( node_nr_base, node_nr_base + group.ndofs, group.nunit_dofs, dtype=tree.particle_id_dtype) - qbx_element_to_source_starts[el_offset:el_offset + group.nelements] = \ - actx.from_numpy(group_element_starts) + qbx_element_to_source_starts[el_offset:el_offset + group.nelements] = ( + actx.from_numpy(group_element_starts)) node_nr_base += group.ndofs el_offset += group.nelements @@ -403,15 +425,12 @@ def _make_centers(discr): # Transfer all tree attributes. tree_attrs = {} - for attr_name in tree.__class__.fields: - try: - tree_attrs[attr_name] = getattr(tree, attr_name) - except AttributeError: - pass + for attr in fields(tree): + tree_attrs[attr.name] = getattr(tree, attr.name) tree_attrs.update(particle_classes) - return TreeWithQBXMetadata( + tree = TreeWithQBXMetadata( qbx_element_to_source_starts=qbx_element_to_source_starts, qbx_element_to_center_starts=qbx_element_to_center_starts, qbx_user_source_slice=qbx_user_source_slice, @@ -421,7 +440,9 @@ def _make_centers(discr): nqbxsources=nsources, nqbxcenters=ncenters, nqbxtargets=ntargets, - **tree_attrs).with_queue(None) + **tree_attrs) + + return actx.freeze(tree) # }}} @@ -436,20 +457,18 @@ class ToHostTransferredGeoDataWrapper(FMMLibRotationDataInterface): def __init__(self, geo_data): self.geo_data = geo_data - @property - def queue(self): - return self.geo_data._setup_actx.queue - def to_numpy(self, ary): return self.geo_data._setup_actx.to_numpy(ary) @memoize_method def tree(self): - return self.geo_data.tree().get(queue=self.queue) + actx = self.geo_data._setup_actx + return actx.to_numpy(self.geo_data.tree()) @memoize_method def traversal(self): - return self.geo_data.traversal().get(queue=self.queue) + actx = self.geo_data._setup_actx + return actx.to_numpy(self.geo_data.traversal()) @property def lpot_source(self): @@ -483,11 +502,13 @@ def qbx_center_to_target_box_source_level(self, source_level): @memoize_method def non_qbx_box_target_lists(self): - return self.geo_data.non_qbx_box_target_lists().get(queue=self.queue) + actx = self.geo_data._setup_actx + return actx.to_numpy(self.geo_data.non_qbx_box_target_lists()) @memoize_method def center_to_tree_targets(self): - return self.geo_data.center_to_tree_targets().get(queue=self.queue) + actx = self.geo_data._setup_actx + return actx.to_numpy(self.geo_data.center_to_tree_targets()) @memoize_method def all_targets(self): diff --git a/pytential/source.py b/pytential/source.py index eb259c825..3f9226a89 100644 --- a/pytential/source.py +++ b/pytential/source.py @@ -36,10 +36,9 @@ flatten, unflatten, ) -from boxtree.timing import TimingResult from meshmode.dof_array import DOFArray from pytools import T, memoize_in -from sumpy.fmm import UnableToCollectTimingData +from sumpy.kernel import Kernel if TYPE_CHECKING: @@ -121,9 +120,8 @@ def p2p(target_kernels: tuple[Kernel, ...], value_dtype = self.real_dtype from sumpy.p2p import P2P - return P2P(actx.context, - target_kernels, exclude_self=False, value_dtypes=value_dtype, - source_kernels=source_kernels) + return P2P(target_kernels, exclude_self=False, value_dtypes=value_dtype, + source_kernels=source_kernels) return p2p(target_kernels, source_kernels) @@ -163,6 +161,10 @@ def evaluate_kernel_arguments( class PointPotentialSource(PotentialSource): """ + .. attribute:: nodes + + An array of shape ``[ambient_dim, ndofs]``. + .. attribute:: ndofs .. automethod:: nodes @@ -223,6 +225,7 @@ def ambient_dim(self) -> int: @override def op_group_features(self, expr: IntG) -> tuple[Hashable, ...]: from pytential.utils import sort_arrays_together + # since IntGs with the same source kernels and densities calculations # for P2E and E2E are the same and only differs in E2P depending on the # target kernel, we group all IntGs with same source kernels and densities. @@ -244,7 +247,7 @@ def cost_model_compute_potential_insn( evaluate: EvaluationMapperBase, calibration_params: dict[str, float], per_box: bool, - ) -> tuple[list[tuple[str, DOFArray]], TimingResult]: + ) -> list[tuple[str, DOFArray]]: raise NotImplementedError def exec_compute_potential_insn( @@ -253,17 +256,8 @@ def exec_compute_potential_insn( insn: ComputePotential, bound_expr: BoundExpression[Any], evaluate: EvaluationMapperBase, - return_timing_data: bool, - ) -> tuple[list[tuple[str, ArrayOrContainerOrScalar]], TimingResult]: - if return_timing_data: - from warnings import warn - warn( - "Timing data collection not supported.", - category=UnableToCollectTimingData, - stacklevel=2) - + ) -> list[tuple[str, ArrayOrContainerOrScalar]]: p2p = None - kernel_args = evaluate_kernel_arguments( actx, evaluate, insn.kernel_arguments, flat=False) strengths = [cast("Array", evaluate(density)) for density in insn.densities] @@ -284,7 +278,7 @@ def exec_compute_potential_insn( p2p = self.get_p2p(actx, source_kernels=insn.source_kernels, target_kernels=insn.target_kernels) - _, output_for_each_kernel = p2p(actx.queue, + output_for_each_kernel = p2p(actx, targets=flatten(target_discr.nodes(), actx, leaf_class=DOFArray), sources=self._nodes, strength=strengths, **kernel_args) @@ -296,8 +290,7 @@ def exec_compute_potential_insn( results.append((o.name, result)) - timing_data: TimingResult = TimingResult() - return results, timing_data + return results # }}} @@ -331,6 +324,11 @@ class LayerPotentialSourceBase(PotentialSource, ABC): Inherits from :class:`PotentialSource`. .. attribute:: density_discr + + .. attribute:: ambient_dim + .. attribute:: dim + .. attribute:: real_dtype + .. attribute:: complex_dtype """ def __init__(self, density_discr: Discretization): diff --git a/pytential/symbolic/execution.py b/pytential/symbolic/execution.py index fc1931ece..ff5c3d85e 100644 --- a/pytential/symbolic/execution.py +++ b/pytential/symbolic/execution.py @@ -117,8 +117,6 @@ def __init__(self, bound_expr, actx: PyOpenCLArrayContext, context=None, if not isinstance(actx, PyOpenCLArrayContext): raise NotImplementedError("evaluation with non-PyOpenCL array context") - self.queue = actx.queue - # {{{ map_XXX def _map_minmax(self, func, inherited_func, expr): @@ -374,28 +372,13 @@ def map_call(self, expr): class EvaluationMapper(EvaluationMapperBase): - def __init__(self, bound_expr, actx, context=None, - timing_data=None): - EvaluationMapperBase.__init__(self, bound_expr, actx, context) - self.timing_data = timing_data + def __init__(self, bound_expr, actx, context=None): + super().__init__(bound_expr, actx, context) def exec_compute_potential_insn( self, actx: PyOpenCLArrayContext, insn, bound_expr, evaluate): source = bound_expr.places.get_geometry(insn.source.geometry) - - return_timing_data = self.timing_data is not None - - result, timing_data = ( - source.exec_compute_potential_insn( - actx, insn, bound_expr, evaluate, return_timing_data)) - - if return_timing_data: - # The compiler ensures this. - assert insn not in self.timing_data - - self.timing_data[insn] = timing_data - - return result + return source.exec_compute_potential_insn(actx, insn, bound_expr, evaluate) # }}} @@ -449,17 +432,13 @@ def exec_compute_potential_insn( else: calibration_params = self.kernel_to_calibration_params[knls] - result, (cost_model_result, metadata) = \ - source.cost_model_compute_potential_insn( - actx, insn, bound_expr, evaluate, calibration_params, - self.per_box) + result = source.cost_model_compute_potential_insn( + actx, insn, bound_expr, evaluate, calibration_params, + self.per_box) # The compiler ensures this. assert insn not in self.modeled_cost - self.modeled_cost[insn] = cost_model_result - self.metadata[insn] = metadata - return result def get_modeled_cost(self): @@ -791,10 +770,10 @@ def cost_per_box(self, calibration_params, **kwargs): :arg calibration_params: either a :class:`dict` returned by `estimate_kernel_specific_calibration_params`, or a :class:`str` "constant_one". - :return: a :class:`dict` mapping from statement to per-box cost. Each - per-box cost is represented by a :class:`numpy.ndarray` or - :class:`pyopencl.array.Array` of shape (nboxes,), where the ith entry - represents the cost of all stages for box i. + :return: a :class:`dict` mapping from instruction to per-box cost. Each + per-box cost is represented by an array of shape ``(nboxes,)``, where + the :math:`i`-th entry represents the cost of all stages for box + :math:`i`. """ array_context = _find_array_context_from_args_in_context(kwargs) @@ -815,7 +794,7 @@ def scipy_op( target from *places* is used. :returns: An object that (mostly) satisfies the :class:`scipy.sparse.linalg.LinearOperator` protocol, except for - accepting and returning :class:`pyopencl.array.Array` arrays. + arrays supported by *actx*. """ if isinstance(self.code.result, np.ndarray): @@ -850,14 +829,12 @@ def scipy_op( return MatVecOp(self, actx, arg_name, dtype, total_dofs, discrs, starts_and_ends, extra_args) - def eval(self, context=None, timing_data=None, + def eval(self, + context: dict[str, Any] | None = None, array_context: PyOpenCLArrayContext | None = None): """Evaluate the expression in *self*, using the input variables given in the dictionary *context*. - :arg timing_data: A dictionary into which timing - data will be inserted during evaluation. - (experimental) :arg array_context: only needs to be supplied if no instances of :class:`~meshmode.dof_array.DOFArray` with a :class:`~arraycontext.PyOpenCLArrayContext` @@ -889,8 +866,7 @@ def eval(self, context=None, timing_data=None, return value - exec_mapper = EvaluationMapper( - self, array_context, context, timing_data=timing_data) + exec_mapper = EvaluationMapper(self, array_context, context) return execute(self.code, exec_mapper) @overload diff --git a/pytential/symbolic/matrix.py b/pytential/symbolic/matrix.py index 9f62c5098..f647511a9 100644 --- a/pytential/symbolic/matrix.py +++ b/pytential/symbolic/matrix.py @@ -489,7 +489,7 @@ def map_int_g(self, expr): kernel.get_base_kernel(), (expr.target_kernel,)) from sumpy.qbx import LayerPotentialMatrixGenerator - mat_gen = LayerPotentialMatrixGenerator(actx.context, + mat_gen = LayerPotentialMatrixGenerator( expansion=local_expn, source_kernels=(kernel,), target_kernels=(expr.target_kernel,)) @@ -500,7 +500,7 @@ def map_int_g(self, expr): kernel_args = _get_layer_potential_args( actx, self.places, expr, context=self.context) - _, (mat,) = mat_gen(actx.queue, + mat, = mat_gen(actx, targets=flatten(target_discr.nodes(), actx, leaf_class=DOFArray), sources=flatten(source_discr.nodes(), actx, leaf_class=DOFArray), centers=flatten(centers, actx, leaf_class=DOFArray), @@ -577,7 +577,7 @@ def map_int_g(self, expr): base_kernel = kernel.get_base_kernel() from sumpy.p2p import P2PMatrixGenerator - mat_gen = P2PMatrixGenerator(actx.context, + mat_gen = P2PMatrixGenerator( source_kernels=(base_kernel,), target_kernels=(target_base_kernel,), exclude_self=self.exclude_self) @@ -603,7 +603,7 @@ def map_int_g(self, expr): # }}} - _, (mat,) = mat_gen(actx.queue, + mat, = mat_gen(actx, targets=flatten(target_discr.nodes(), actx, leaf_class=DOFArray), sources=flatten(source_discr.nodes(), actx, leaf_class=DOFArray), **kernel_args) @@ -701,8 +701,10 @@ def map_int_g(self, expr): kernel.get_base_kernel(), (expr.target_kernel,)) from sumpy.qbx import LayerPotentialMatrixSubsetGenerator - mat_gen = LayerPotentialMatrixSubsetGenerator(actx.context, local_expn, - source_kernels=(kernel,), target_kernels=(expr.target_kernel,)) + mat_gen = LayerPotentialMatrixSubsetGenerator( + local_expn, + source_kernels=(kernel,), + target_kernels=(expr.target_kernel,)) # }}} @@ -711,7 +713,7 @@ def map_int_g(self, expr): kernel_args = _get_layer_potential_args( actx, self.places, expr, context=self.context) - _, (mat,) = mat_gen(actx.queue, + mat, = mat_gen(actx, targets=flatten(target_discr.nodes(), actx, leaf_class=DOFArray), sources=flatten(source_discr.nodes(), actx, leaf_class=DOFArray), centers=flatten(centers, actx, leaf_class=DOFArray), @@ -798,7 +800,7 @@ def map_int_g(self, expr): base_kernel = kernel.get_base_kernel() from sumpy.p2p import P2PMatrixSubsetGenerator - mat_gen = P2PMatrixSubsetGenerator(actx.context, + mat_gen = P2PMatrixSubsetGenerator( source_kernels=(base_kernel,), target_kernels=(target_base_kernel,), exclude_self=self.exclude_self) @@ -824,7 +826,7 @@ def map_int_g(self, expr): # }}} - _, (mat,) = mat_gen(actx.queue, + mat, = mat_gen(actx, targets=flatten(target_discr.nodes(), actx, leaf_class=DOFArray), sources=flatten(source_discr.nodes(), actx, leaf_class=DOFArray), tgtindices=tgtindices, diff --git a/pytential/symbolic/primitives.py b/pytential/symbolic/primitives.py index ead7cfd94..070e616b8 100644 --- a/pytential/symbolic/primitives.py +++ b/pytential/symbolic/primitives.py @@ -124,14 +124,14 @@ This can be converted to an object array by calling: :meth:`pymbolic.geometric_algebra.MultiVector.as_vector`. -:class:`pyopencl.array.Array` and :class:`meshmode.dof_array.DOFArray` instances -hold per-node degrees of freedom (and only those). Such instances do *not* occur -on the symbolic side of :mod:`pytential` at all. They're only visible either as -bound inputs (see :func:`pytential.bind`) or outputs of evaluation. Which one is -used depends on the meaning of the data being represented. If the data is -associated with a :class:`~meshmode.discretization.Discretization`, then -:class:`~meshmode.dof_array.DOFArray` is used and otherwise -:class:`~pyopencl.array.Array` is used. +:class:`meshmode.dof_array.DOFArray` instances hold per-node degrees of freedom +(and only those). Such instances do *not* occur on the symbolic side of +:mod:`pytential` at all. They're only visible either as bound inputs (see +:func:`pytential.bind`) or outputs of evaluation. Which one is used depends on +the meaning of the data being represented. If the data is associated with a +:class:`~meshmode.discretization.Discretization`, then +:class:`~meshmode.dof_array.DOFArray` is used and otherwise a base array +type for the underlying :class:`~arraycontext.ArrayContext` is used. .. autoclass:: ExpressionNode :show-inheritance: diff --git a/pytential/unregularized.py b/pytential/unregularized.py index 2af8b7cdd..ed678bdde 100644 --- a/pytential/unregularized.py +++ b/pytential/unregularized.py @@ -27,17 +27,17 @@ import logging from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Literal import numpy as np import loopy as lp -from arraycontext import PyOpenCLArrayContext, flatten, unflatten -from boxtree.tools import DeviceDataRecord +from arraycontext import Array, PyOpenCLArrayContext, flatten, unflatten from loopy.version import MOST_RECENT_LANGUAGE_VERSION from meshmode.dof_array import DOFArray from pytools import memoize_method +from pytential.array_context import dataclass_array_container from pytential.source import LayerPotentialSourceBase @@ -117,16 +117,7 @@ def copy( debug=debug if debug is not None else self.debug) def exec_compute_potential_insn(self, actx: PyOpenCLArrayContext, - insn, bound_expr, evaluate, return_timing_data): - if return_timing_data: - from warnings import warn - - from pytential.source import UnableToCollectTimingData - warn( - "Timing data collection not supported.", - category=UnableToCollectTimingData, - stacklevel=2) - + insn, bound_expr, evaluate): from pytools import obj_array def evaluate_wrapper(expr): @@ -158,8 +149,8 @@ def preprocess_optemplate(self, name, discretizations, expr): from pytential.symbolic.mappers import UnregularizedPreprocessor return UnregularizedPreprocessor(name, discretizations)(expr) - def exec_compute_potential_insn_direct(self, actx: PyOpenCLArrayContext, - insn, bound_expr, evaluate): + def exec_compute_potential_insn_direct(self, + actx: PyOpenCLArrayContext, insn, bound_expr, evaluate): kernel_args = {} for arg_name, arg_expr in insn.kernel_arguments.items(): @@ -184,7 +175,7 @@ def exec_compute_potential_insn_direct(self, actx: PyOpenCLArrayContext, p2p = self.get_p2p(actx, source_kernels=insn.source_kernels, target_kernels=insn.target_kernels) - _, output_for_each_kernel = p2p(actx.queue, + output_for_each_kernel = p2p(actx, targets=flatten(target_discr.nodes(), actx, leaf_class=DOFArray), sources=flatten( self.density_discr.nodes(), actx, leaf_class=DOFArray @@ -199,8 +190,7 @@ def exec_compute_potential_insn_direct(self, actx: PyOpenCLArrayContext, results.append((o.name, result)) - timing_data: dict[str, Any] = {} - return results, timing_data + return results # {{{ fmm-based execution @@ -218,7 +208,7 @@ def _tree_indep_data_for_wrangler(self, fmm_kernel, target_kernels, from sumpy.fmm import SumpyTreeIndependentDataForWrangler return SumpyTreeIndependentDataForWrangler( - self.cl_context, + self._setup_actx, fmm_mpole_factory, fmm_local_factory, target_kernels=target_kernels, source_kernels=source_kernels) @@ -290,8 +280,7 @@ def exec_compute_potential_insn_fmm(self, actx: PyOpenCLArrayContext, # }}} from boxtree.fmm import drive_fmm - all_potentials_on_every_tgt = drive_fmm( - wrangler, flat_strengths, timing_data=None) + all_potentials_on_every_tgt = drive_fmm(actx, wrangler, flat_strengths) # {{{ postprocess fmm @@ -314,8 +303,7 @@ def exec_compute_potential_insn_fmm(self, actx: PyOpenCLArrayContext, # }}} - timing_data: dict[str, Any] = {} - return results, timing_data + return results # }}} @@ -330,10 +318,6 @@ class _FMMGeometryDataCodeContainer: ambient_dim: int debug: bool - @property - def cl_context(self): - return self.array_context.context - @memoize_method def copy_targets_kernel(self): knl = lp.make_kernel( @@ -354,22 +338,24 @@ def copy_targets_kernel(self): knl = lp.tag_array_axes(knl, "targets", "stride:auto, stride:1") knl = lp.tag_inames(knl, {"dim": "ilp"}) - return knl.executor(self.cl_context) + return knl.executor(self.array_context.context) @property @memoize_method def build_tree(self): from boxtree import TreeBuilder - return TreeBuilder(self.cl_context) + return TreeBuilder(self.array_context) @property @memoize_method def build_traversal(self): from boxtree.traversal import FMMTraversalBuilder - return FMMTraversalBuilder(self.cl_context) + return FMMTraversalBuilder(self.array_context) -class _TargetInfo(DeviceDataRecord): +@dataclass_array_container +@dataclass(frozen=True) +class _TargetInfo: """ .. attribute:: targets @@ -382,6 +368,10 @@ class _TargetInfo(DeviceDataRecord): .. attribute:: ntargets """ + targets: Array + target_discr_starts: Array + ntargets: int + @dataclass(frozen=True) class _FMMGeometryData: @@ -390,10 +380,6 @@ class _FMMGeometryData: target_discrs: Sequence[TargetOrDiscretization] debug: bool - @property - def cl_context(self): - return self.code_getter.cl_context - @property def array_context(self): return self.code_getter.array_context @@ -410,9 +396,9 @@ def ambient_dim(self): def traversal(self): actx = self.array_context trav, _ = self.code_getter.build_traversal( - actx.queue, self.tree(), debug=self.debug) + actx, self.tree(), debug=self.debug) - return trav.with_queue(None) + return actx.freeze(trav) @memoize_method def tree(self): @@ -437,7 +423,7 @@ def tree(self): MAX_LEAF_REFINE_WEIGHT = 32 - tree, _ = code_getter.build_tree(actx.queue, + tree, _ = code_getter.build_tree(actx, particles=flatten( lpot_src.density_discr.nodes(), actx, leaf_class=DOFArray ), @@ -479,9 +465,9 @@ def target_info(self): ) return _TargetInfo( - targets=targets, + targets=actx.freeze(targets), target_discr_starts=target_discr_starts, - ntargets=ntargets).with_queue(None) + ntargets=ntargets) # }}} diff --git a/requirements.txt b/requirements.txt index e205325f5..1b493378d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,9 +8,9 @@ git+https://github.com/inducer/modepy.git#egg=modepy git+https://github.com/inducer/pyopencl.git#egg=pyopencl git+https://github.com/inducer/islpy.git#egg=islpy git+https://github.com/inducer/loopy.git#egg=loopy -git+https://github.com/inducer/boxtree.git#egg=boxtree +git+https://github.com/alexfikl/boxtree.git@towards-array-context#egg=boxtree git+https://github.com/inducer/arraycontext.git#egg=arraycontext git+https://github.com/inducer/gmsh_interop/#egg=gmsh_interop git+https://github.com/inducer/meshmode.git#egg=meshmode -git+https://github.com/inducer/sumpy.git#egg=sumpy +git+https://github.com/alexfikl/sumpy.git@towards-array-context#egg=sumpy git+https://github.com/inducer/pyfmmlib.git#egg=pyfmmlib diff --git a/test/test_beltrami.py b/test/test_beltrami.py index 0ca646c3a..afc6f3c00 100644 --- a/test/test_beltrami.py +++ b/test/test_beltrami.py @@ -25,6 +25,7 @@ import logging from dataclasses import dataclass +from typing import TYPE_CHECKING import extra_int_eq_data as eid import pytest @@ -39,12 +40,6 @@ LaplaceBeltramiOperator, YukawaBeltramiOperator, ) - - -logger = logging.getLogger(__name__) - -from typing import TYPE_CHECKING - from pytential.utils import pytest_teardown_function as teardown_function # noqa: F401 @@ -52,6 +47,7 @@ import numpy as np +logger = logging.getLogger(__name__) pytest_generate_tests = pytest_generate_tests_for_array_contexts([ PytestPyOpenCLArrayContextFactory, ]) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 51b87339e..05c44459c 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -45,13 +45,10 @@ _PythonQBXCostModel, make_pde_aware_translation_cost_model, ) - - -logger = logging.getLogger(__name__) - from pytential.utils import pytest_teardown_function as teardown_function # noqa: F401 +logger = logging.getLogger(__name__) pytest_generate_tests = pytest_generate_tests_for_array_contexts([ PytestPyOpenCLArrayContextFactory, ]) @@ -66,7 +63,6 @@ def test_compare_cl_and_py_cost_model(actx_factory): qbx_order = fmm_order actx = actx_factory() - queue = actx.queue # {{{ Construct geometry @@ -118,7 +114,7 @@ def test_compare_cl_and_py_cost_model(actx_factory): constant_one_params["p_fmm_lev%d" % ilevel] = 10 cl_cost_factors = cl_cost_model.qbx_cost_factors_for_kernels_from_model( - queue, tree.nlevels, xlat_cost, constant_one_params + actx, tree.nlevels, xlat_cost, constant_one_params ) python_cost_factors = python_cost_model.qbx_cost_factors_for_kernels_from_model( @@ -131,33 +127,33 @@ def test_compare_cl_and_py_cost_model(actx_factory): cl_ndirect_sources_per_target_box = ( cl_cost_model.get_ndirect_sources_per_target_box( - queue, geo_data_dev.traversal() + actx, geo_data_dev.traversal() ) ) import time - queue.finish() + actx.queue.finish() start_time = time.time() cl_p2qbxl = cl_cost_model.process_form_qbxl( - queue, geo_data_dev, cl_cost_factors["p2qbxl_cost"], + actx, geo_data_dev, cl_cost_factors["p2qbxl_cost"], cl_ndirect_sources_per_target_box ) - queue.finish() + actx.queue.finish() logger.info("OpenCL time for process_form_qbxl: %s", time.time() - start_time) python_ndirect_sources_per_target_box = ( python_cost_model.get_ndirect_sources_per_target_box( - queue, geo_data.traversal() + actx, geo_data.traversal() ) ) start_time = time.time() python_p2qbxl = python_cost_model.process_form_qbxl( - queue, geo_data, python_cost_factors["p2qbxl_cost"], + actx, geo_data, python_cost_factors["p2qbxl_cost"], python_ndirect_sources_per_target_box ) @@ -169,20 +165,20 @@ def test_compare_cl_and_py_cost_model(actx_factory): # {{{ Test process_m2qbxl - queue.finish() + actx.queue.finish() start_time = time.time() cl_m2qbxl = cl_cost_model.process_m2qbxl( - queue, geo_data_dev, cl_cost_factors["m2qbxl_cost"] + actx, geo_data_dev, cl_cost_factors["m2qbxl_cost"] ) - queue.finish() + actx.queue.finish() logger.info("OpenCL time for process_m2qbxl: %s", time.time() - start_time) start_time = time.time() python_m2qbxl = python_cost_model.process_m2qbxl( - queue, geo_data, python_cost_factors["m2qbxl_cost"] + actx, geo_data, python_cost_factors["m2qbxl_cost"] ) logger.info("Python time for process_m2qbxl: %s", time.time() - start_time) @@ -193,20 +189,20 @@ def test_compare_cl_and_py_cost_model(actx_factory): # {{{ Test process_l2qbxl - queue.finish() + actx.queue.finish() start_time = time.time() cl_l2qbxl = cl_cost_model.process_l2qbxl( - queue, geo_data_dev, cl_cost_factors["l2qbxl_cost"] + actx, geo_data_dev, cl_cost_factors["l2qbxl_cost"] ) - queue.finish() + actx.queue.finish() logger.info("OpenCL time for process_l2qbxl: %s", time.time() - start_time) start_time = time.time() python_l2qbxl = python_cost_model.process_l2qbxl( - queue, geo_data, python_cost_factors["l2qbxl_cost"] + actx, geo_data, python_cost_factors["l2qbxl_cost"] ) logger.info("Python time for process_l2qbxl: %s", time.time() - start_time) @@ -217,20 +213,20 @@ def test_compare_cl_and_py_cost_model(actx_factory): # {{{ Test process_eval_qbxl - queue.finish() + actx.queue.finish() start_time = time.time() cl_eval_qbxl = cl_cost_model.process_eval_qbxl( - queue, geo_data_dev, cl_cost_factors["qbxl2p_cost"] + actx, geo_data_dev, cl_cost_factors["qbxl2p_cost"] ) - queue.finish() + actx.queue.finish() logger.info("OpenCL time for process_eval_qbxl: %s", time.time() - start_time) start_time = time.time() python_eval_qbxl = python_cost_model.process_eval_qbxl( - queue, geo_data, python_cost_factors["qbxl2p_cost"] + actx, geo_data, python_cost_factors["qbxl2p_cost"] ) logger.info("Python time for process_eval_qbxl: %s", time.time() - start_time) @@ -241,15 +237,15 @@ def test_compare_cl_and_py_cost_model(actx_factory): # {{{ Test eval_target_specific_qbxl - queue.finish() + actx.queue.finish() start_time = time.time() cl_eval_target_specific_qbxl = cl_cost_model.process_eval_target_specific_qbxl( - queue, geo_data_dev, cl_cost_factors["p2p_tsqbx_cost"], + actx, geo_data_dev, cl_cost_factors["p2p_tsqbx_cost"], cl_ndirect_sources_per_target_box ) - queue.finish() + actx.queue.finish() logger.info("OpenCL time for eval_target_specific_qbxl: %s", time.time() - start_time) @@ -257,7 +253,7 @@ def test_compare_cl_and_py_cost_model(actx_factory): python_eval_target_specific_qbxl = \ python_cost_model.process_eval_target_specific_qbxl( - queue, geo_data, python_cost_factors["p2p_tsqbx_cost"], + actx, geo_data, python_cost_factors["p2p_tsqbx_cost"], python_ndirect_sources_per_target_box ) @@ -340,7 +336,9 @@ def test_timing_data_gathering(ctx_factory): pytest.importorskip("pyfmmlib") import pyopencl as cl - from meshmode.array_context import PyOpenCLArrayContext + + from pytential.array_context import PyOpenCLArrayContext + cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) @@ -358,11 +356,7 @@ def test_timing_data_gathering(ctx_factory): sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) op_S = bind(places, sym_op_S) - - timing_data = {} - op_S.eval({"sigma": sigma}, timing_data=timing_data, array_context=actx) - assert timing_data - logging.info(timing_data) + op_S.eval({"sigma": sigma}, array_context=actx) # }}} @@ -395,11 +389,9 @@ def test_cost_model(actx_factory, dim, use_target_specific_qbx, per_box): op_S = bind(places, sym_op_S) if per_box: - cost_S, _ = op_S.cost_per_box("constant_one", sigma=sigma) + _cost_S, _ = op_S.cost_per_box("constant_one", sigma=sigma) else: - cost_S, _ = op_S.cost_per_stage("constant_one", sigma=sigma) - - assert len(cost_S) == 1 + _cost_S, _ = op_S.cost_per_stage("constant_one", sigma=sigma) sym_op_S_plus_D = ( sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) @@ -407,15 +399,9 @@ def test_cost_model(actx_factory, dim, use_target_specific_qbx, per_box): op_S_plus_D = bind(places, sym_op_S_plus_D) if per_box: - cost_S_plus_D, _ = op_S_plus_D.cost_per_box( - "constant_one", sigma=sigma - ) + _cost_S_plus_D, _ = op_S_plus_D.cost_per_box("constant_one", sigma=sigma) else: - cost_S_plus_D, _ = op_S_plus_D.cost_per_stage( - "constant_one", sigma=sigma - ) - - assert len(cost_S_plus_D) == 2 + _cost_S_plus_D, _ = op_S_plus_D.cost_per_stage("constant_one", sigma=sigma) # }}} @@ -447,7 +433,6 @@ def test_cost_model_metadata_gathering(actx_factory): _, metadata = op_S.cost_per_stage( "constant_one", sigma=sigma, k=k, return_metadata=True ) - metadata, = metadata.values() geo_data = lpot_source.qbx_fmm_geometry_data( places, @@ -456,6 +441,9 @@ def test_cost_model_metadata_gathering(actx_factory): tree = geo_data.tree() + if not metadata: + return + assert metadata["p_qbx"] == QBX_ORDER assert metadata["nlevels"] == tree.nlevels assert metadata["nsources"] == tree.nsources @@ -474,8 +462,7 @@ def test_cost_model_metadata_gathering(actx_factory): class ConstantOneQBXExpansionWrangler(ConstantOneExpansionWrangler): - def __init__(self, tree_indep, queue, geo_data, - use_target_specific_qbx): + def __init__(self, tree_indep, geo_data, use_target_specific_qbx): from pytential.qbx.utils import ToHostTransferredGeoDataWrapper geo_data = ToHostTransferredGeoDataWrapper(geo_data) @@ -499,7 +486,7 @@ def output_zeros(self): non_qbx_box_target_lists = self.geo_data.non_qbx_box_target_lists() return np.zeros(non_qbx_box_target_lists.nfiltered_targets) - def full_output_zeros(self, template_ary): + def full_output_zeros(self, actx): from pytools import obj_array return obj_array.new_1d([np.zeros(self.tree.ntargets)]) @@ -510,13 +497,12 @@ def reorder_potentials(self, potentials): raise NotImplementedError("reorder_potentials should not " "be called on a QBXExpansionWrangler") - def form_global_qbx_locals(self, src_weight_vecs): + def form_global_qbx_locals(self, actx, src_weight_vecs): src_weights, = src_weight_vecs local_exps = self.qbx_local_expansion_zeros() - ops = 0 if self.using_tsqbx: - return local_exps, self.timing_future(ops) + return local_exps global_qbx_centers = self.geo_data.global_qbx_centers() qbx_center_to_target_box = self.geo_data.qbx_center_to_target_box() @@ -530,16 +516,14 @@ def form_global_qbx_locals(self, src_weight_vecs): src_sum = 0 for src_ibox in self.trav.neighbor_source_boxes_lists[start:end]: src_pslice = self._get_source_slice(src_ibox) - ops += src_pslice.stop - src_pslice.start src_sum += np.sum(src_weights[src_pslice]) local_exps[tgt_icenter] = src_sum - return local_exps, self.timing_future(ops) + return local_exps - def translate_box_multipoles_to_qbx_local(self, multipole_exps): + def translate_box_multipoles_to_qbx_local(self, actx, multipole_exps): local_exps = self.qbx_local_expansion_zeros() - ops = 0 global_qbx_centers = self.geo_data.global_qbx_centers() @@ -559,13 +543,11 @@ def translate_box_multipoles_to_qbx_local(self, multipole_exps): for src_ibox in ssn.lists[start:stop]: local_exps[tgt_icenter] += multipole_exps[src_ibox] - ops += 1 - return local_exps, self.timing_future(ops) + return local_exps - def translate_box_local_to_qbx_local(self, local_exps): + def translate_box_local_to_qbx_local(self, actx, local_exps): qbx_expansions = self.qbx_local_expansion_zeros() - ops = 0 global_qbx_centers = self.geo_data.global_qbx_centers() qbx_center_to_target_box = self.geo_data.qbx_center_to_target_box() @@ -574,13 +556,11 @@ def translate_box_local_to_qbx_local(self, local_exps): isrc_box = qbx_center_to_target_box[tgt_icenter] src_ibox = self.trav.target_boxes[isrc_box] qbx_expansions[tgt_icenter] += local_exps[src_ibox] - ops += 1 - return qbx_expansions, self.timing_future(ops) + return qbx_expansions - def eval_qbx_expansions(self, qbx_expansions): + def eval_qbx_expansions(self, actx, qbx_expansions): output = self.full_output_zeros(qbx_expansions) - ops = 0 global_qbx_centers = self.geo_data.global_qbx_centers() center_to_tree_targets = self.geo_data.center_to_tree_targets() @@ -591,17 +571,15 @@ def eval_qbx_expansions(self, qbx_expansions): for icenter_tgt in range(start, end): center_itgt = center_to_tree_targets.lists[icenter_tgt] output[0][center_itgt] += qbx_expansions[src_icenter] - ops += 1 - return output, self.timing_future(ops) + return output - def eval_target_specific_qbx_locals(self, src_weight_vecs): + def eval_target_specific_qbx_locals(self, actx, src_weight_vecs): src_weights, = src_weight_vecs pot = self.full_output_zeros(src_weights) - ops = 0 if not self.using_tsqbx: - return pot, self.timing_future(ops) + return pot global_qbx_centers = self.geo_data.global_qbx_centers() center_to_tree_targets = self.geo_data.center_to_tree_targets() @@ -642,9 +620,7 @@ def eval_target_specific_qbx_locals(self, src_weight_vecs): ctr_itgt = center_to_tree_targets.lists[ictr_tgt] pot[0][ctr_itgt] = src_sum - ops += (ictr_tgt_end - ictr_tgt_start) * nsrcs - - return pot, self.timing_future(ops) + return pot # }}} @@ -694,7 +670,6 @@ def test_cost_model_correctness(actx_factory, dim, off_surface, use_target_specific_qbx): """Check that computed cost matches that of a constant-one FMM.""" actx = actx_factory() - queue = actx.queue cost_model = QBXCostModel( translation_cost_model_factory=OpCountingTranslationCostModel) @@ -710,7 +685,8 @@ def test_cost_model_correctness(actx_factory, dim, off_surface, from pytential.target import PointsTarget ntargets = 10 ** 3 targets = PointsTarget( - make_uniform_particle_array(queue, ntargets, dim, np.float64)) + make_uniform_particle_array(actx, ntargets, dim, np.float64)) + target_discrs_and_qbx_sides = ((targets, 0),) qbx_forced_limit = None places = GeometryCollection((lpot_source, targets)) @@ -735,7 +711,6 @@ def test_cost_model_correctness(actx_factory, dim, off_surface, sigma = get_density(actx, density_discr) modeled_time, _ = op_S.cost_per_stage("constant_one", sigma=sigma) - modeled_time, = modeled_time.values() # Run FMM with ConstantOneWrangler. This can't be done with pytential's # high-level interface, so call the FMM driver directly. @@ -746,7 +721,7 @@ def test_cost_model_correctness(actx_factory, dim, off_surface, wrangler = ConstantOneQBXExpansionWrangler( TreeIndependentDataForWrangler(), - queue, geo_data, use_target_specific_qbx) + geo_data, use_target_specific_qbx) quad_stage2_density_discr = places.get_discretization( source_dd.geometry, sym.QBX_SOURCE_QUAD_STAGE2) @@ -754,12 +729,16 @@ def test_cost_model_correctness(actx_factory, dim, off_surface, src_weights = np.ones(ndofs) timing_data = {} - potential = drive_fmm(wrangler, (src_weights,), timing_data - )[0][geo_data.ncenters:] + potential = drive_fmm(actx, wrangler, (src_weights,))[0][geo_data.ncenters:] # Check constant one wrangler for correctness. assert np.all(potential == ndofs) + if not timing_data: + return + + modeled_time, = modeled_time.values() + # Check that the cost model matches the timing data returned by the # constant one wrangler. mismatches = [] @@ -783,7 +762,7 @@ def test_cost_model_correctness(actx_factory, dim, off_surface, logging.info(per_box_cost) per_box_cost, = per_box_cost.values() - total_aggregate_cost = cost_model.aggregate_over_boxes(per_box_cost) + total_aggregate_cost = cost_model.aggregate_over_boxes(actx, per_box_cost) assert total_cost == ( total_aggregate_cost + modeled_time["coarsen_multipoles"] @@ -825,15 +804,12 @@ def level_to_order_constant(kernel, kernel_args, tree, level): cost_constant, metadata = bind(places, sym_op).cost_per_stage( "constant_one", sigma=sigma) - cost_constant, = cost_constant.values() - metadata, = metadata.values() - # }}} # {{{ varying level to order def level_to_order_varying(kernel, kernel_args, tree, level): - return metadata["nlevels"] - level + return tree.nlevels - level lpot_source = get_lpot_source(actx, 2).copy( cost_model=QBXCostModel(), @@ -847,10 +823,15 @@ def level_to_order_varying(kernel, kernel_args, tree, level): cost_varying, _ = bind(lpot_source, sym_op).cost_per_stage( "constant_one", sigma=sigma) - cost_varying, = cost_varying.values() - # }}} + if not metadata: + return + + cost_constant, = cost_constant.values() + metadata, = metadata.values() + cost_varying, = cost_varying.values() + assert sum(cost_varying.values()) > sum(cost_constant.values()) # }}} diff --git a/test/test_global_qbx.py b/test/test_global_qbx.py index 61cf292e7..74c505930 100644 --- a/test/test_global_qbx.py +++ b/test/test_global_qbx.py @@ -43,13 +43,10 @@ from pytential import GeometryCollection, bind, sym from pytential.qbx import QBXLayerPotentialSource - - -logger = logging.getLogger(__name__) - from pytential.utils import pytest_teardown_function as teardown_function # noqa: F401 +logger = logging.getLogger(__name__) pytest_generate_tests = pytest_generate_tests_for_array_contexts([ PytestPyOpenCLArrayContextFactory, ]) @@ -338,8 +335,7 @@ def test_target_association(actx_factory, curve_name, curve_f, nelements, # {{{ generate targets - from pyopencl.clrandom import PhiloxGenerator - rng = PhiloxGenerator(actx.context, seed=RNG_SEED) + rng = np.random.default_rng(RNG_SEED) ambient_dim = places.ambient_dim dd = places.auto_source.to_stage1() @@ -350,11 +346,7 @@ def test_target_association(actx_factory, curve_name, curve_f, nelements, actx)).reshape(ambient_dim, -1) density_discr = places.get_discretization(dd.geometry) - - noise = actx.to_numpy( - rng.uniform(actx.queue, density_discr.ndofs, - dtype=np.float64, a=0.01, b=1.0) - ) + noise = rng.uniform(0.01, 1.0, density_discr.ndofs) tunnel_radius = actx.to_numpy(flatten( bind(places, sym._close_target_tunnel_radii(ambient_dim, dofdesc=dd))(actx), @@ -409,14 +401,14 @@ def targets_from_sources(sign, dist, dim=2): ) code_container = target_association_code_container(actx) - target_assoc = ( + target_assoc = actx.to_numpy( associate_targets_to_qbx_centers( places, places.auto_source, code_container.get_wrangler(actx), target_discrs, target_association_tolerance=1e-10) - ).get(queue=actx.queue) + ) expansion_radii = actx.to_numpy(flatten( bind(places, sym.expansion_radii(ambient_dim, diff --git a/test/test_layer_pot.py b/test/test_layer_pot.py index 368170e5c..971b9bf82 100644 --- a/test/test_layer_pot.py +++ b/test/test_layer_pot.py @@ -36,13 +36,10 @@ from sumpy.visualization import FieldPlotter from pytential import GeometryCollection, bind, norm, sym - - -logger = logging.getLogger(__name__) - from pytential.utils import pytest_teardown_function as teardown_function # noqa: F401 +logger = logging.getLogger(__name__) pytest_generate_tests = pytest_generate_tests_for_array_contexts([ PytestPyOpenCLArrayContextFactory, ]) diff --git a/test/test_layer_pot_eigenvalues.py b/test/test_layer_pot_eigenvalues.py index aa8d0e270..6ad55a38e 100644 --- a/test/test_layer_pot_eigenvalues.py +++ b/test/test_layer_pot_eigenvalues.py @@ -35,13 +35,10 @@ from meshmode.array_context import PytestPyOpenCLArrayContextFactory from pytential import GeometryCollection, bind, norm, sym - - -logger = logging.getLogger(__name__) - from pytential.utils import pytest_teardown_function as teardown_function # noqa: F401 +logger = logging.getLogger(__name__) pytest_generate_tests = pytest_generate_tests_for_array_contexts([ PytestPyOpenCLArrayContextFactory, ]) diff --git a/test/test_linalg_skeletonization.py b/test/test_linalg_skeletonization.py index 675b69362..0f9b36396 100644 --- a/test/test_linalg_skeletonization.py +++ b/test/test_linalg_skeletonization.py @@ -26,6 +26,7 @@ import logging from dataclasses import replace from functools import partial +from typing import TYPE_CHECKING import extra_matrix_data as extra import numpy as np @@ -38,12 +39,6 @@ from meshmode.mesh.generation import NArmedStarfish, ellipse from pytential import GeometryCollection, sym - - -logger = logging.getLogger(__name__) - -from typing import TYPE_CHECKING - from pytential.utils import pytest_teardown_function as teardown_function # noqa: F401 @@ -51,6 +46,7 @@ from collections.abc import Sequence +logger = logging.getLogger(__name__) pytest_generate_tests = pytest_generate_tests_for_array_contexts([ PytestPyOpenCLArrayContextFactory, ]) diff --git a/test/test_linalg_utils.py b/test/test_linalg_utils.py index 668a43e83..5ba298279 100644 --- a/test/test_linalg_utils.py +++ b/test/test_linalg_utils.py @@ -33,12 +33,10 @@ from meshmode import _acf # noqa: F401 from meshmode.array_context import PytestPyOpenCLArrayContextFactory - -logger = logging.getLogger(__name__) - from pytential.utils import pytest_teardown_function as teardown_function # noqa: F401 +logger = logging.getLogger(__name__) pytest_generate_tests = pytest_generate_tests_for_array_contexts([ PytestPyOpenCLArrayContextFactory, ]) diff --git a/test/test_matrix.py b/test/test_matrix.py index 97f27d38c..606cd5794 100644 --- a/test/test_matrix.py +++ b/test/test_matrix.py @@ -35,19 +35,16 @@ import pytest from arraycontext import flatten, pytest_generate_tests_for_array_contexts, unflatten -from meshmode import _acf # noqa: F401 # noqa: F401 # noqa: F401 +from meshmode import _acf # noqa: F401 from meshmode.array_context import PytestPyOpenCLArrayContextFactory from meshmode.mesh.generation import NArmedStarfish, ellipse from pytools import obj_array from pytential import GeometryCollection, bind, sym - - -logger = logging.getLogger(__name__) - from pytential.utils import pytest_teardown_function as teardown_function # noqa: F401 +logger = logging.getLogger(__name__) pytest_generate_tests = pytest_generate_tests_for_array_contexts([ PytestPyOpenCLArrayContextFactory, ]) diff --git a/test/test_maxwell.py b/test/test_maxwell.py index 5888cddef..66243559c 100644 --- a/test/test_maxwell.py +++ b/test/test_maxwell.py @@ -34,18 +34,12 @@ from meshmode.array_context import PytestPyOpenCLArrayContextFactory from meshmode.mesh.processing import find_bounding_box from sumpy.point_calculus import CalculusPatch, frequency_domain_maxwell -from sumpy.tools import vector_from_device -from sumpy.visualization import make_field_plotter_from_bbox from pytential import bind, norm, sym from pytential.target import PointsTarget logger = logging.getLogger(__name__) - -from pytential.utils import pytest_teardown_function as teardown_function # noqa: F401 - - pytest_generate_tests = pytest_generate_tests_for_array_contexts([ PytestPyOpenCLArrayContextFactory, ]) @@ -247,13 +241,13 @@ def test_pec_mfie_extinction(actx_factory, case, calc_patch = CalculusPatch(np.array([-3, 0, 0]), h=0.01) calc_patch_tgt = PointsTarget(actx.from_numpy(calc_patch.points)) - import pyopencl.clrandom as clrandom - rng = clrandom.PhiloxGenerator(actx.context, seed=12) - from pytools import obj_array + rng = np.random.default_rng(12) src_j = obj_array.new_1d([ - rng.normal(actx.queue, (test_source.ndofs), dtype=np.float64) + actx.from_numpy( + rng.standard_normal(test_source.ndofs, dtype=np.float64) + ) for _ in range(3)]) def eval_inc_field_at(places, source=None, target=None): @@ -324,8 +318,9 @@ def eval_inc_field_at(places, source=None, target=None): }) if visualize: - qbx_tgt_tol = qbx.copy(target_association_tolerance=0.2) + from sumpy.visualization import make_field_plotter_from_bbox + qbx_tgt_tol = qbx.copy(target_association_tolerance=0.2) fplot = make_field_plotter_from_bbox( find_bounding_box(scat_discr.mesh), h=(0.05, 0.05, 0.3), extend_factor=0.3) @@ -346,8 +341,9 @@ def eval_inc_field_at(places, source=None, target=None): bind(places, sym.h_max(qbx.ambient_dim))(actx) ) - pde_test_inc = EHField(vector_from_device(actx.queue, - eval_inc_field_at(places, target="patch_target"))) + pde_test_inc = EHField( + actx.from_numpy(eval_inc_field_at(places, target="patch_target")) + ) source_maxwell_resids = [ calc_patch.norm(x, np.inf) / calc_patch.norm(pde_test_inc.e, np.inf) @@ -404,8 +400,9 @@ def eval_repr_at(tgt, source=None, target=None): places, sym_repr, auto_where=(source, target) # noqa: B023 )(actx, jt=jt, rho=rho, **knl_kwargs) # noqa: B023 - pde_test_repr = EHField(vector_from_device(actx.queue, - eval_repr_at(places, target="patch_target"))) + pde_test_repr = EHField( + actx.from_numpy(eval_repr_at(places, target="patch_target")) + ) maxwell_residuals = [ actx.to_numpy( @@ -479,9 +476,10 @@ def eval_repr_at(tgt, source=None, target=None): ]) raise - fplot_repr = EHField(vector_from_device(actx.queue, fplot_repr)) - fplot_inc = EHField(vector_from_device(actx.queue, - eval_inc_field_at(places, target="plot_targets"))) + fplot_repr = EHField(actx.from_numpy(fplot_repr)) + fplot_inc = EHField( + actx.from_numpy(eval_inc_field_at(places, target="plot_targets")) + ) fplot.write_vtk_file( "potential-%s.vts" % resolution, diff --git a/test/test_scalar_int_eq.py b/test/test_scalar_int_eq.py index caa9e4ede..5910aab5e 100644 --- a/test/test_scalar_int_eq.py +++ b/test/test_scalar_int_eq.py @@ -38,13 +38,10 @@ from sumpy.kernel import BiharmonicKernel, HelmholtzKernel, LaplaceKernel from pytential import GeometryCollection, bind, sym - - -logger = logging.getLogger(__name__) - from pytential.utils import pytest_teardown_function as teardown_function # noqa: F401 +logger = logging.getLogger(__name__) pytest_generate_tests = pytest_generate_tests_for_array_contexts([ PytestPyOpenCLArrayContextFactory, ]) diff --git a/test/test_stokes.py b/test/test_stokes.py index 47736e5bf..5c22656cb 100644 --- a/test/test_stokes.py +++ b/test/test_stokes.py @@ -31,20 +31,17 @@ import pytest from arraycontext import flatten, pytest_generate_tests_for_array_contexts -from meshmode import _acf # noqa: F401 # noqa: F401 # noqa: F401 +from meshmode import _acf # noqa: F401 from meshmode.array_context import PytestPyOpenCLArrayContextFactory from meshmode.discretization import Discretization from meshmode.discretization.poly_element import InterpolatoryQuadratureGroupFactory from pytools import obj_array from pytential import GeometryCollection, bind, sym - - -logger = logging.getLogger(__name__) - from pytential.utils import pytest_teardown_function as teardown_function # noqa: F401 +logger = logging.getLogger(__name__) pytest_generate_tests = pytest_generate_tests_for_array_contexts([ PytestPyOpenCLArrayContextFactory, ]) diff --git a/test/test_symbolic.py b/test/test_symbolic.py index aa48008b2..3502083b6 100644 --- a/test/test_symbolic.py +++ b/test/test_symbolic.py @@ -40,13 +40,10 @@ ) from pytential import bind, sym - - -logger = logging.getLogger(__name__) - from pytential.utils import pytest_teardown_function as teardown_function # noqa: F401 +logger = logging.getLogger(__name__) pytest_generate_tests = pytest_generate_tests_for_array_contexts([ PytestPyOpenCLArrayContextFactory, ]) diff --git a/test/test_target_specific_qbx.py b/test/test_target_specific_qbx.py index 0b2c4c5fd..c24e28403 100644 --- a/test/test_target_specific_qbx.py +++ b/test/test_target_specific_qbx.py @@ -34,13 +34,10 @@ from sumpy.kernel import HelmholtzKernel, LaplaceKernel from pytential import GeometryCollection, bind, sym - - -logger = logging.getLogger(__name__) - from pytential.utils import pytest_teardown_function as teardown_function # noqa: F401 +logger = logging.getLogger(__name__) pytest_generate_tests = pytest_generate_tests_for_array_contexts([ PytestPyOpenCLArrayContextFactory, ]) diff --git a/test/test_tools.py b/test/test_tools.py index e320e3e5b..058d73fa0 100644 --- a/test/test_tools.py +++ b/test/test_tools.py @@ -34,12 +34,10 @@ from meshmode import _acf # noqa: F401 from meshmode.array_context import PytestPyOpenCLArrayContextFactory - -logger = logging.getLogger(__name__) - from pytential.utils import pytest_teardown_function as teardown_function # noqa: F401 +logger = logging.getLogger(__name__) pytest_generate_tests = pytest_generate_tests_for_array_contexts([ PytestPyOpenCLArrayContextFactory, ])