Skip to content

Commit ba9dc9a

Browse files
committed
use new method name and 32->256
1 parent cb3945a commit ba9dc9a

File tree

3 files changed

+15
-8
lines changed

3 files changed

+15
-8
lines changed

sumpy/e2p.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def default_name(self):
126126
def get_kernel(self, max_ntargets_in_one_box):
127127
ncoeffs = len(self.expansion)
128128
loopy_args = self.get_loopy_args()
129-
max_work_items = min(32, max(ncoeffs, max_ntargets_in_one_box))
129+
max_work_items = min(256, max(ncoeffs, max_ntargets_in_one_box))
130130

131131
loopy_knl = lp.make_kernel(
132132
[
@@ -211,8 +211,8 @@ def get_optimized_kernel(self, max_ntargets_in_one_box):
211211
inner_knl, optimizations = self.get_cached_loopy_knl_and_optimizations()
212212
knl = self.get_kernel(max_ntargets_in_one_box=max_ntargets_in_one_box)
213213
knl = lp.tag_inames(knl, {"itgt_box": "g.0"})
214-
knl = lp.split_iname(knl, "itgt_offset", 32, inner_tag="l.0")
215-
knl = lp.split_iname(knl, "icoeff", 32, inner_tag="l.0")
214+
knl = lp.split_iname(knl, "itgt_offset", 256, inner_tag="l.0")
215+
knl = lp.split_iname(knl, "icoeff", 256, inner_tag="l.0")
216216
knl = lp.add_inames_to_insn(knl, "dummy",
217217
"id:fetch_init* or id:fetch_center or id:kernel_scaling")
218218
knl = lp.add_inames_to_insn(knl, "itgt_box", "id:kernel_scaling")
@@ -276,7 +276,7 @@ def default_name(self):
276276
def get_kernel(self, max_ntargets_in_one_box):
277277
ncoeffs = len(self.expansion)
278278
loopy_args = self.get_loopy_args()
279-
max_work_items = min(32, max(ncoeffs, max_ntargets_in_one_box))
279+
max_work_items = min(256, max(ncoeffs, max_ntargets_in_one_box))
280280

281281
loopy_knl = lp.make_kernel(
282282
[
@@ -383,8 +383,8 @@ def get_optimized_kernel(self, max_ntargets_in_one_box):
383383
knl = lp.tag_inames(knl, {"itgt_box": "g.0", "dummy": "l.0"})
384384
knl = lp.unprivatize_temporaries_with_inames(knl,
385385
"itgt_offset", "result_temp")
386-
knl = lp.split_iname(knl, "itgt_offset", 32, inner_tag="l.0")
387-
knl = lp.split_iname(knl, "icoeff", 32, inner_tag="l.0")
386+
knl = lp.split_iname(knl, "itgt_offset", 256, inner_tag="l.0")
387+
knl = lp.split_iname(knl, "icoeff", 256, inner_tag="l.0")
388388
knl = lp.privatize_temporaries_with_inames(knl,
389389
"itgt_offset_outer", "result_temp")
390390
knl = lp.duplicate_inames(knl, "itgt_offset_outer", "id:init_result")

sumpy/expansion/local.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@
3232
VolumeTaylorExpansionMixin,
3333
LinearPDEConformingVolumeTaylorExpansion)
3434
from sumpy.tools import add_to_sac, mi_increment_axis
35+
from sumpy.kernel import Kernel
36+
37+
import loopy as lp
38+
39+
from typing import Sequence
3540

3641
import logging
3742
logger = logging.getLogger(__name__)
@@ -405,7 +410,7 @@ def loopy_translate_from(self, src_expansion):
405410
f"A direct loopy kernel for translation from "
406411
f"{src_expansion} to {self} is not implemented.")
407412

408-
def loopy_evaluate(self, kernels):
413+
def get_loopy_evaluator(self, kernels: Sequence[Kernel]) -> lp.TranslationUnit:
409414
from sumpy.expansion.loopy import (make_l2p_loopy_kernel_for_volume_taylor,
410415
make_e2p_loopy_kernel)
411416
try:

sumpy/expansion/loopy.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -515,11 +515,13 @@ def get_idx(v):
515515
if c != sync_split:
516516
tags["e2p_iorder3"] = "l.0"
517517

518+
nsplit = min(256, ncoeffs)
519+
518520
optimizations += [
519521
lambda knl: lp.tag_inames(knl, tags),
520522
lambda knl: lp.set_temporary_address_space(knl, "e2p_coeffs_copy",
521523
lp.AddressSpace.LOCAL),
522-
lambda knl: lp.split_iname(knl, "e2p_icoeff", 32, inner_tag="l.0"),
524+
lambda knl: lp.split_iname(knl, "e2p_icoeff", nsplit, inner_tag="l.0"),
523525
]
524526

525527
target_args = gather_loopy_arguments((expansion,) + tuple(kernels))

0 commit comments

Comments
 (0)