From 75665003e17a49547f9ebdb835ba6d7c58aa3737 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Sat, 7 Mar 2020 16:40:12 -0600 Subject: [PATCH 01/11] Use make_cse --- sumpy/assignment_collection.py | 12 ++++++++---- sumpy/codegen.py | 4 ---- sumpy/expansion/local.py | 2 +- sumpy/expansion/multipole.py | 6 +++--- sumpy/symbolic.py | 10 ++++++---- 5 files changed, 18 insertions(+), 16 deletions(-) diff --git a/sumpy/assignment_collection.py b/sumpy/assignment_collection.py index dadf3fc95..f12c6d596 100644 --- a/sumpy/assignment_collection.py +++ b/sumpy/assignment_collection.py @@ -185,16 +185,20 @@ def run_global_cse(self, extra_exprs=[]): new_assignments, new_exprs = cse(assign_exprs + extra_exprs, symbols=self.symbol_generator) + xreplace_dict = {} + for name, value in new_assignments: + new_value = sym.make_cse(value.xreplace(xreplace_dict)) + xreplace_dict[name] = new_value + + for i in range(len(new_exprs)): + new_exprs[i] = new_exprs[i].xreplace(xreplace_dict) + new_assign_exprs = new_exprs[:len(assign_exprs)] new_extra_exprs = new_exprs[len(assign_exprs):] for name, new_expr in zip(assign_names, new_assign_exprs): self.assignments[name] = new_expr - for name, value in new_assignments: - assert isinstance(name, sym.Symbol) - self.add_assignment(name.name, value) - logger.info("common subexpression elimination: done after {dur:.2f} s" .format(dur=time.time() - start_time)) return new_extra_exprs diff --git a/sumpy/codegen.py b/sumpy/codegen.py index 085f9bd45..9e52afa0d 100644 --- a/sumpy/codegen.py +++ b/sumpy/codegen.py @@ -67,10 +67,6 @@ class SympyToPymbolicMapper(SympyToPymbolicMapperBase): def not_supported(self, expr): if isinstance(expr, int): return expr - elif getattr(expr, "is_Function", False): - func_name = SympyToPymbolicMapperBase.function_name(self, expr) - return prim.Variable(func_name)( - *tuple(self.rec(arg) for arg in expr.args)) else: return SympyToPymbolicMapperBase.not_supported(self, expr) diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index b4b435de5..78560f6f4 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -270,7 +270,7 @@ def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, rscale=src_rscale) replace_dict = dict((d, d/src_rscale) for d in dvec) taker = MiDerivativeTaker(expr, dvec) - rscale_ratio = sym.UnevaluatedExpr(tgt_rscale/src_rscale) + rscale_ratio = sym.make_cse(tgt_rscale/src_rscale) result = [ (taker.diff(mi).xreplace(replace_dict) * rscale_ratio**sum(mi)) for mi in self.get_coefficient_identifiers()] diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py index e3759d434..b643004b6 100644 --- a/sumpy/expansion/multipole.py +++ b/sumpy/expansion/multipole.py @@ -101,7 +101,7 @@ def coefficients_from_source(self, avec, bvec, rscale): for i, mi in enumerate(coeff_identifiers): result[i] /= (mi_factorial(mi) * rscale ** sum(mi)) else: - avec = [sym.UnevaluatedExpr(a * rscale**-1) for a in avec] + avec = [sym.make_cse(a * rscale**-1) for a in avec] result = [ mi_power(avec, mi) / mi_factorial(mi) @@ -170,7 +170,7 @@ def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, src_coeff_exprs = list(src_coeff_exprs) for i, mi in enumerate(src_expansion.get_coefficient_identifiers()): - src_coeff_exprs[i] *= sym.UnevaluatedExpr(src_rscale/tgt_rscale)**sum(mi) + src_coeff_exprs[i] *= sym.make_cse(src_rscale/tgt_rscale)**sum(mi) result = [0] * len(self.get_full_coefficient_identifiers()) @@ -223,7 +223,7 @@ def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, k = src_mi[idim] assert n >= k contrib /= mi_factorial((n-k,)) - contrib *= sym.UnevaluatedExpr(dvec[idim]/tgt_rscale)**(n-k) + contrib *= sym.make_cse(dvec[idim]/tgt_rscale)**(n-k) result[i] += contrib diff --git a/sumpy/symbolic.py b/sumpy/symbolic.py index 7a86958ae..0de2f80ab 100644 --- a/sumpy/symbolic.py +++ b/sumpy/symbolic.py @@ -80,13 +80,15 @@ def _find_symbolic_backend(): if USE_SYMENGINE: import symengine as sym - from pymbolic.interop.symengine import ( + from pymbolic.interop.symengine import ( # noqa: F401 PymbolicToSymEngineMapper as PymbolicToSympyMapper, - SymEngineToPymbolicMapper as SympyToPymbolicMapper) + SymEngineToPymbolicMapper as SympyToPymbolicMapper, + make_cse) else: import sympy as sym - from pymbolic.interop.sympy import ( - PymbolicToSympyMapper, SympyToPymbolicMapper) + from pymbolic.interop.sympy import ( # noqa: F401 + PymbolicToSympyMapper, SympyToPymbolicMapper, + make_cse) for _apifunc in SYMBOLIC_API: globals()[_apifunc] = getattr(sym, _apifunc) From 91c8370045b457e81a52db43530038fd4158b561 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Sat, 7 Mar 2020 17:05:49 -0600 Subject: [PATCH 02/11] Need to wait till #125 lands --- sumpy/expansion/local.py | 2 +- sumpy/expansion/multipole.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 78560f6f4..b4b435de5 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -270,7 +270,7 @@ def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, rscale=src_rscale) replace_dict = dict((d, d/src_rscale) for d in dvec) taker = MiDerivativeTaker(expr, dvec) - rscale_ratio = sym.make_cse(tgt_rscale/src_rscale) + rscale_ratio = sym.UnevaluatedExpr(tgt_rscale/src_rscale) result = [ (taker.diff(mi).xreplace(replace_dict) * rscale_ratio**sum(mi)) for mi in self.get_coefficient_identifiers()] diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py index b643004b6..e3759d434 100644 --- a/sumpy/expansion/multipole.py +++ b/sumpy/expansion/multipole.py @@ -101,7 +101,7 @@ def coefficients_from_source(self, avec, bvec, rscale): for i, mi in enumerate(coeff_identifiers): result[i] /= (mi_factorial(mi) * rscale ** sum(mi)) else: - avec = [sym.make_cse(a * rscale**-1) for a in avec] + avec = [sym.UnevaluatedExpr(a * rscale**-1) for a in avec] result = [ mi_power(avec, mi) / mi_factorial(mi) @@ -170,7 +170,7 @@ def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, src_coeff_exprs = list(src_coeff_exprs) for i, mi in enumerate(src_expansion.get_coefficient_identifiers()): - src_coeff_exprs[i] *= sym.make_cse(src_rscale/tgt_rscale)**sum(mi) + src_coeff_exprs[i] *= sym.UnevaluatedExpr(src_rscale/tgt_rscale)**sum(mi) result = [0] * len(self.get_full_coefficient_identifiers()) @@ -223,7 +223,7 @@ def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, k = src_mi[idim] assert n >= k contrib /= mi_factorial((n-k,)) - contrib *= sym.make_cse(dvec[idim]/tgt_rscale)**(n-k) + contrib *= sym.UnevaluatedExpr(dvec[idim]/tgt_rscale)**(n-k) result[i] += contrib From 61c2db02b8ab0dee905c0dec800fcba14d576b38 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Sat, 7 Mar 2020 17:48:58 -0600 Subject: [PATCH 03/11] Use tiker.net --- .test-conda-env-py3.yml | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml index 2264e9260..202edac89 100644 --- a/.test-conda-env-py3.yml +++ b/.test-conda-env-py3.yml @@ -19,5 +19,5 @@ dependencies: - pip: - git+https://github.com/inducer/pytools - git+https://gitlab.tiker.net/inducer/boxtree - - git+https://github.com/inducer/pymbolic + - git+https://gitlab.tiker.net/inducer/pymbolic - git+https://github.com/inducer/loopy diff --git a/requirements.txt b/requirements.txt index efe91f948..f522da7df 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ numpy sympy==1.1.1 git+https://github.com/inducer/pytools -git+https://github.com/inducer/pymbolic +git+https://gitlab.tiker.net/inducer/pymbolic git+https://github.com/inducer/islpy git+https://github.com/inducer/pyopencl git+https://gitlab.tiker.net/inducer/boxtree From c7f0fc4225251a0a11482f4c04ebc3b9b22db866 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Fri, 13 Mar 2020 10:18:40 -0500 Subject: [PATCH 04/11] Add symbolic sum --- benchmarks/bench_translations.py | 2 +- sumpy/codegen.py | 49 +++++++++++++++++++++++++++++--- sumpy/kernel.py | 4 +-- sumpy/symbolic.py | 45 ++++++++++++++++++++++++++++- sumpy/tools.py | 36 +++++++++++++++++++++++ test/test_codegen.py | 46 ++++++++++++++++++++++++++++++ 6 files changed, 174 insertions(+), 8 deletions(-) diff --git a/benchmarks/bench_translations.py b/benchmarks/bench_translations.py index 8d6cfdd88..58388ceba 100644 --- a/benchmarks/bench_translations.py +++ b/benchmarks/bench_translations.py @@ -75,7 +75,7 @@ def track_m2l_op_count(self, param): for i, expr in enumerate(result): sac.assign_unique("coeff%d" % i, expr) sac.run_global_cse() - insns = to_loopy_insns(six.iteritems(sac.assignments)) + insns, _ = to_loopy_insns(six.iteritems(sac.assignments)) counter = pymbolic.mapper.flop_counter.CSEAwareFlopCounter() return sum([counter.rec(insn.expression)+1 for insn in insns]) diff --git a/sumpy/codegen.py b/sumpy/codegen.py index 9e52afa0d..34c39dc61 100644 --- a/sumpy/codegen.py +++ b/sumpy/codegen.py @@ -32,7 +32,7 @@ import six import re -from pymbolic.mapper import IdentityMapper, WalkMapper, CSECachingMapperMixin +from pymbolic.mapper import CSECachingMapperMixin import pymbolic.primitives as prim from loopy.types import NumpyType @@ -40,6 +40,7 @@ from pytools import memoize_method from sumpy.symbolic import (SympyToPymbolicMapper as SympyToPymbolicMapperBase) +from sumpy.symbolic import Series, IdentityMapper, WalkMapper, SubstitutionMapper import logging logger = logging.getLogger(__name__) @@ -64,6 +65,18 @@ class SympyToPymbolicMapper(SympyToPymbolicMapperBase): + def map_Sum(self, expr): + if len(expr.limits) != 1: + raise NotImplementedError + + name, low, high = expr.limits[0] + + # TODO: name, low, high need recursion? + low = int(low) + high = int(high) + + return Series(self.rec(expr.function), self.rec(name), low, high) + def not_supported(self, expr): if isinstance(expr, int): return expr @@ -129,7 +142,7 @@ def make_one_step_subst(assignments): for name in toposort: value = assignments[name] - value = substitute(value, result) + value = substitute(value, result, mapper=SubstitutionMapper) used_name_to_var.update( (used_name, prim.Variable(used_name)) for used_name in get_dependencies(value) @@ -175,7 +188,7 @@ def kill_trivial_assignments(assignments, retain_names=set()): result = [] from pymbolic import substitute for name, expr in approved_assignments: - r = substitute(expr, unsubst_rej) + r = substitute(expr, unsubst_rej, mapper=SubstitutionMapper) result.append((name, r)) logger.info( @@ -673,6 +686,32 @@ def map_variable(self, expr): map_common_subexpression_uncached = IdentityMapper.map_common_subexpression +# {{{ convert pymbolic "Series" class to loopy reduction + +class SeriesRewritter(CSECachingMapperMixin, IdentityMapper): + + def __init__(self): + self.additional_loop_domain = [] + + def map_series(self, expr): + function = self.rec(expr.function) + + # TODO: recursion on low, high and name? + low = expr.low + high = expr.high + name = str(expr.name) + + self.additional_loop_domain.append( + (name, low, high) + ) + + return lp.Reduction("sum", name, function) + + map_common_subexpression_uncached = IdentityMapper.map_common_subexpression + +# }}} + + def to_loopy_insns(assignments, vector_names=set(), pymbolic_expr_maps=[], complex_dtype=None, retain_names=set()): logger.info("loopy instruction generation: start") @@ -699,6 +738,7 @@ def to_loopy_insns(assignments, vector_names=set(), pymbolic_expr_maps=[], # do the rest of the conversion bessel_sub = BesselSubstitutor(BesselGetter(btog.bessel_j_arg_to_top_order)) + sr = SeriesRewritter() vcr = VectorComponentRewriter(vector_names) pwr = PowerRewriter() ssg = SumSignGrouper() @@ -708,6 +748,7 @@ def to_loopy_insns(assignments, vector_names=set(), pymbolic_expr_maps=[], def convert_expr(name, expr): logger.debug("generate expression for: %s" % name) + expr = sr(expr) expr = bdr(expr) expr = bessel_sub(expr) expr = vcr(expr) @@ -731,6 +772,6 @@ def convert_expr(name, expr): for name, expr in assignments] logger.info("loopy instruction generation: done") - return result + return result, sr.additional_loop_domain # vim: fdm=marker diff --git a/sumpy/kernel.py b/sumpy/kernel.py index 6383d8d57..4d0249f8e 100644 --- a/sumpy/kernel.py +++ b/sumpy/kernel.py @@ -26,8 +26,8 @@ import loopy as lp import numpy as np -from pymbolic.mapper import IdentityMapper, CSECachingMapperMixin -from sumpy.symbolic import pymbolic_real_norm_2 +from pymbolic.mapper import CSECachingMapperMixin +from sumpy.symbolic import pymbolic_real_norm_2, IdentityMapper from pymbolic.primitives import make_sym_vector from pymbolic import var diff --git a/sumpy/symbolic.py b/sumpy/symbolic.py index 0de2f80ab..fb05aa2fb 100644 --- a/sumpy/symbolic.py +++ b/sumpy/symbolic.py @@ -27,7 +27,9 @@ from six.moves import range import numpy as np -from pymbolic.mapper import IdentityMapper as IdentityMapperBase +from loopy.symbolic import IdentityMapper as IdentityMapperBase +from loopy.symbolic import WalkMapper as WalkMapperBase +from pymbolic.mapper.substitutor import SubstitutionMapper as SubstitutionMapperBase import pymbolic.primitives as prim import logging @@ -248,4 +250,45 @@ def map_subscript(self, expr): else: self.raise_conversion_error(expr) + +# {{{ Series + +class Series(prim.Expression): + def __init__(self, function, name, low, high): + self.function = function + self.name = name + self.low = low + self.high = high + + mapper_method = "map_series" + + def __getinitargs__(self): + return self.function, self.name, self.low, self.high + + +class IdentityMapper(IdentityMapperBase): + def map_series(self, expr): + return Series( + self.rec(expr.function), + expr.name, + expr.low, + expr.high + ) + + +class WalkMapper(WalkMapperBase): + def map_series(self, expr, *args, **kwargs): + if not self.visit(expr, *args, **kwargs): + return + + self.rec(expr.function, *args, **kwargs) + + self.post_visit(expr, *args, **kwargs) + + +class SubstitutionMapper(SubstitutionMapperBase, IdentityMapper): + pass + +# }}} + # vim: fdm=marker diff --git a/sumpy/tools.py b/sumpy/tools.py index 4d1098429..d5fdba8cf 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -717,4 +717,40 @@ def my_syntactic_subs(expr, subst_dict): return expr +def get_loopy_domain(loop_domains): + """Helper function to get the loopy domain to pass to `loopy.make_kernel`. + + :arg loop_domains: a list of domains. Each domain is a tuple. If the tuple has 3 + elements (name, low, high), it represents iname "name" has range [low, high). + If the tuple has 2 elements (name, duplicate_name), it represents iname + "name" has the same range as "duplicate_name", where the domain of + "duplicate_name" must be represented by 3-element tuples. + """ + domain_to_range = {} + + for domain in loop_domains: + if len(domain) == 3: + name, low, high = domain + assert name not in domain_to_range + domain_to_range[name] = (low, high) + + for idx, domain in enumerate(loop_domains): + if len(domain) == 2: + name, duplicate_name = domain + low, high = domain_to_range[duplicate_name] + loop_domains[idx] = (name, low, high) + + domain_names = "" + conditions = "" + + for idx, (name, low, high) in enumerate(loop_domains): + domain_names += f"{name}" + conditions += f"{low} <= {name} < {high}" + + if idx + 1 != len(loop_domains): + domain_names += ", " + conditions += " and " + + return "{[" + domain_names + "]:" + conditions + "}" + # vim: fdm=marker diff --git a/test/test_codegen.py b/test/test_codegen.py index 7e3c25e0e..d16dd578b 100644 --- a/test/test_codegen.py +++ b/test/test_codegen.py @@ -25,6 +25,10 @@ import sys +import pyopencl as cl +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + import logging logger = logging.getLogger(__name__) @@ -105,6 +109,48 @@ def test_line_taylor_coeff_growth(): assert np.polyfit(np.log(indices), np.log(counts), deg=1)[0] < max_order +def test_sym_sum(ctx_getter): + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + import six + from sumpy.assignment_collection import SymbolicAssignmentCollection + sac = SymbolicAssignmentCollection() + + from sympy.abc import j + from sympy import Sum + sac.add_assignment("tmp", Sum(j, (j, 1, 10))) + + from sumpy.codegen import to_loopy_insns + insn, additional_loop_domain = to_loopy_insns( + six.iteritems(sac.assignments), + retain_names=["tmp"] + ) + + from sumpy.tools import get_loopy_domain + domain = get_loopy_domain( + [("i", 0, 5)] + + additional_loop_domain + ) + + import loopy as lp + knl = lp.make_kernel( + domain, + insn + [lp.Assignment("a[i]", "tmp")], + lang_version=(2018, 2) + ) + + _, result = knl(queue) + result = result[0].get() + + import numpy as np + ref_sol = np.ones(5, dtype=np.int32) + ref_sol = ref_sol * 45 + + assert result.shape == (5,) + assert np.allclose(result, ref_sol) + + # You can test individual routines by typing # $ python test_fmm.py 'test_sumpy_fmm(cl.create_some_context)' From 50ae8ef8691322143726165d1247e22d3d9ced8f Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Fri, 13 Mar 2020 11:34:45 -0500 Subject: [PATCH 05/11] Update operations for new codegen API --- sumpy/e2e.py | 29 ++++++++++++++++----- sumpy/e2p.py | 20 ++++++++++----- sumpy/p2e.py | 20 +++++++++++---- sumpy/p2p.py | 70 +++++++++++++++++++++++++++++++++++--------------- sumpy/qbx.py | 63 ++++++++++++++++++++++++++++++--------------- sumpy/tools.py | 5 +++- 6 files changed, 148 insertions(+), 59 deletions(-) diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 12e80b5f5..6c8653abb 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -160,13 +160,18 @@ def get_kernel(self): # # (same for itgt_box, tgt_ibox) + insns, additional_domain = self.get_translation_loopy_insns() + + from sumpy.tools import get_loopy_domain + additional_domain = get_loopy_domain(additional_domain) + from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( [ "{[itgt_box]: 0<=itgt_box tgt_ibox = target_boxes[itgt_box] @@ -190,7 +195,7 @@ def get_kernel(self): {{dep=read_src_ibox}} """.format(coeffidx=i) for i in range(ncoeff_src)] + [ - ] + self.get_translation_loopy_insns() + [""" + ] + insns + [""" end """] + [""" @@ -276,11 +281,16 @@ def get_kernel(self): # # (same for itgt_box, tgt_ibox) + insns, additional_domain = self.get_translation_loopy_insns() + + from sumpy.tools import get_loopy_domain + additional_domain = get_loopy_domain(additional_domain) + loopy_insns = [ insn.copy( predicates=insn.predicates | frozenset(["is_src_box_valid"]), id=lp.UniqueName("compute_coeff")) - for insn in self.get_translation_loopy_insns()] + for insn in insns] from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( @@ -288,7 +298,7 @@ def get_kernel(self): "{[itgt_box]: 0<=itgt_box tgt_ibox = target_boxes[itgt_box] @@ -395,12 +405,17 @@ def get_kernel(self): # # (same for itgt_box, tgt_ibox) + insns, additional_domain = self.get_translation_loopy_insns() + + from sumpy.tools import get_loopy_domain + additional_domain = get_loopy_domain(additional_domain) + from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( [ "{[itgt_box]: 0<=itgt_box tgt_ibox = target_boxes[itgt_box] @@ -419,7 +434,7 @@ def get_kernel(self): {{id_prefix=read_expn,dep=read_src_ibox}} """.format(i=i) for i in range(ncoeffs)] + [ - ] + self.get_translation_loopy_insns() + [""" + ] + insns + [""" tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] = \ tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] + coeff{i} \ diff --git a/sumpy/e2p.py b/sumpy/e2p.py index 7b0072ad5..ffe790538 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -102,7 +102,7 @@ def get_loopy_insns_and_result_names(self): sac.run_global_cse() from sumpy.codegen import to_loopy_insns - loopy_insns = to_loopy_insns( + loopy_insns, additional_domain = to_loopy_insns( six.iteritems(sac.assignments), vector_names=set(["b"]), pymbolic_expr_maps=[self.expansion.get_code_transformer()], @@ -110,7 +110,7 @@ def get_loopy_insns_and_result_names(self): complex_dtype=np.complex128 # FIXME ) - return loopy_insns, result_names + return loopy_insns, additional_domain, result_names def get_kernel_scaling_assignment(self): from sumpy.symbolic import SympyToPymbolicMapper @@ -135,13 +135,17 @@ class E2PFromSingleBox(E2PBase): def get_kernel(self): ncoeffs = len(self.expansion) - loopy_insns, result_names = self.get_loopy_insns_and_result_names() + loopy_insns, additional_domain, result_names = \ + self.get_loopy_insns_and_result_names() + + from sumpy.tools import get_loopy_domain + additional_domain = get_loopy_domain(additional_domain) loopy_knl = lp.make_kernel( [ "{[itgt_box]: 0<=itgt_box src_ibox = source_boxes[isrc_box] @@ -133,7 +138,7 @@ def get_kernel(self): <> a[idim] = center[idim] - sources[idim, isrc] {dup=idim} <> strength = strengths[isrc] - """] + self.get_loopy_instructions() + [""" + """] + insns + [""" end """] + [""" tgt_expansions[src_ibox-tgt_base_ibox, {coeffidx}] = \ @@ -206,6 +211,11 @@ class P2EFromCSR(P2EBase): def get_kernel(self): ncoeffs = len(self.expansion) + insns, additional_domain = self.get_loopy_instructions() + + from sumpy.tools import get_loopy_domain + additional_domain = get_loopy_domain(additional_domain) + from sumpy.tools import gather_loopy_source_arguments arguments = ( [ @@ -231,7 +241,7 @@ def get_kernel(self): "{[isrc_box]: isrc_box_start<=isrc_box tgt_ibox = target_boxes[itgt_box] @@ -251,7 +261,7 @@ def get_kernel(self): {dup=idim} <> strength = strengths[isrc] - """] + self.get_loopy_instructions() + [""" + """] + insns + [""" end end """] + [""" diff --git a/sumpy/p2p.py b/sumpy/p2p.py index e3b457dd5..e6f5e1e83 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -99,7 +99,8 @@ def get_loopy_insns_and_result_names(self): sac.run_global_cse() from sumpy.codegen import to_loopy_insns - loopy_insns = to_loopy_insns(six.iteritems(sac.assignments), + loopy_insns, additional_domain = to_loopy_insns( + six.iteritems(sac.assignments), vector_names=set(["d"]), pymbolic_expr_maps=[ knl.get_code_transformer() for knl in self.kernels], @@ -107,7 +108,7 @@ def get_loopy_insns_and_result_names(self): complex_dtype=np.complex128 # FIXME ) - return loopy_insns, result_names + return loopy_insns, additional_domain, result_names def get_strength_or_not(self, isrc, kernel_idx): return var("strength").index((self.strength_usage[kernel_idx], isrc)) @@ -168,7 +169,18 @@ class P2P(P2PBase): default_name = "p2p_apply" def get_kernel(self): - loopy_insns, result_names = self.get_loopy_insns_and_result_names() + loopy_insns, additional_domain, result_names = \ + self.get_loopy_insns_and_result_names() + + from sumpy.tools import get_loopy_domain + domain = get_loopy_domain( + [ + ("itgt", 0, "ntargets"), + ("isrc", 0, "nsources"), + ("idim", 0, "dim") + ] + additional_domain + ) + kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( self.get_default_src_tgt_arguments() @@ -179,12 +191,8 @@ def get_kernel(self): shape="nresults, ntargets", dim_tags="sep,C") ]) - loopy_knl = lp.make_kernel([""" - {[itgt, isrc, idim]: \ - 0 <= itgt < ntargets and \ - 0 <= isrc < nsources and \ - 0 <= idim < dim} - """], + loopy_knl = lp.make_kernel( + domain, self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + ["<> d[idim] = targets[idim, itgt] - sources[idim, isrc]"] @@ -238,7 +246,18 @@ def get_strength_or_not(self, isrc, kernel_idx): return 1 def get_kernel(self): - loopy_insns, result_names = self.get_loopy_insns_and_result_names() + loopy_insns, additional_domain, result_names = \ + self.get_loopy_insns_and_result_names() + + from sumpy.tools import get_loopy_domain + domain = get_loopy_domain( + [ + ("itgt", 0, "ntargets"), + ("isrc", 0, "nsources"), + ("idim", 0, "dim") + ] + additional_domain + ) + kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( self.get_default_src_tgt_arguments() @@ -246,12 +265,8 @@ def get_kernel(self): shape="ntargets,nsources") for i, dtype in enumerate(self.value_dtypes)]) - loopy_knl = lp.make_kernel([""" - {[itgt, isrc, idim]: \ - 0 <= itgt < ntargets and \ - 0 <= isrc < nsources and \ - 0 <= idim < dim} - """], + loopy_knl = lp.make_kernel( + domain, self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + ["<> d[idim] = targets[idim, itgt] - sources[idim, isrc]"] @@ -304,7 +319,17 @@ def get_strength_or_not(self, isrc, kernel_idx): return 1 def get_kernel(self): - loopy_insns, result_names = self.get_loopy_insns_and_result_names() + loopy_insns, additional_domain, result_names = \ + self.get_loopy_insns_and_result_names() + + from sumpy.tools import get_loopy_domain + domain = get_loopy_domain( + [ + ("imat", 0, "nresult"), + ("idim", 0, "dim") + ] + additional_domain + ) + kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( self.get_default_src_tgt_arguments() @@ -317,7 +342,7 @@ def get_kernel(self): for i, dtype in enumerate(self.value_dtypes)]) loopy_knl = lp.make_kernel( - "{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}", + domain, self.get_kernel_scaling_assignments() # NOTE: itgt, isrc need to always be defined in case a statement # in loopy_insns or kernel_exprs needs them (e.g. hardcoded in @@ -412,7 +437,12 @@ class P2PFromCSR(P2PBase): default_name = "p2p_from_csr" def get_kernel(self): - loopy_insns, result_names = self.get_loopy_insns_and_result_names() + loopy_insns, additional_domain, result_names = \ + self.get_loopy_insns_and_result_names() + + from sumpy.tools import get_loopy_domain + additional_domain = get_loopy_domain(additional_domain) + kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( self.get_default_src_tgt_arguments() @@ -443,7 +473,7 @@ def get_kernel(self): itgt_start <= itgt < itgt_end and \ isrc_start <= isrc < isrc_end and \ 0 <= idim < dim}", - ], + ] + additional_domain, self.get_kernel_scaling_assignments() + [""" for itgt_box diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 9708764c0..1b15f9fe9 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -122,7 +122,7 @@ def get_loopy_insns_and_result_names(self): sac.run_global_cse() from sumpy.codegen import to_loopy_insns - loopy_insns = to_loopy_insns( + loopy_insns, additional_domain = to_loopy_insns( six.iteritems(sac.assignments), vector_names=set(["a", "b"]), pymbolic_expr_maps=[ @@ -131,7 +131,7 @@ def get_loopy_insns_and_result_names(self): complex_dtype=np.complex128 # FIXME ) - return loopy_insns, result_names + return loopy_insns, additional_domain, result_names def get_strength_or_not(self, isrc, kernel_idx): return var("strength_%d" % self.strength_usage[kernel_idx]).index(isrc) @@ -198,7 +198,18 @@ class LayerPotential(LayerPotentialBase): @memoize_method def get_kernel(self): - loopy_insns, result_names = self.get_loopy_insns_and_result_names() + loopy_insns, additional_domain, result_names = \ + self.get_loopy_insns_and_result_names() + + from sumpy.tools import get_loopy_domain + domain = get_loopy_domain( + [ + ("itgt", 0, "ntargets"), + ("isrc", 0, "nsources"), + ("idim", 0, "dim") + ] + additional_domain + ) + kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( self.get_default_src_tgt_arguments() @@ -209,12 +220,8 @@ def get_kernel(self): None, shape="ntargets", order="C") for i in range(len(self.kernels))]) - loopy_knl = lp.make_kernel([""" - {[itgt, isrc, idim]: \ - 0 <= itgt < ntargets and \ - 0 <= isrc < nsources and \ - 0 <= idim < dim} - """], + loopy_knl = lp.make_kernel( + domain, self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + ["<> a[idim] = center[idim, itgt] - src[idim, isrc] {dup=idim}"] @@ -272,7 +279,18 @@ def get_strength_or_not(self, isrc, kernel_idx): @memoize_method def get_kernel(self): - loopy_insns, result_names = self.get_loopy_insns_and_result_names() + loopy_insns, additional_domain, result_names = \ + self.get_loopy_insns_and_result_names() + + from sumpy.tools import get_loopy_domain + domain = get_loopy_domain( + [ + ("itgt", 0, "ntargets"), + ("isrc", 0, "nsources"), + ("idim", 0, "dim") + ] + additional_domain + ) + kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( self.get_default_src_tgt_arguments() @@ -280,12 +298,8 @@ def get_kernel(self): dtype, shape="ntargets, nsources", order="C") for i, dtype in enumerate(self.value_dtypes)]) - loopy_knl = lp.make_kernel([""" - {[itgt, isrc, idim]: \ - 0 <= itgt < ntargets and \ - 0 <= isrc < nsources and \ - 0 <= idim < dim} - """], + loopy_knl = lp.make_kernel( + domain, self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + ["<> a[idim] = center[idim, itgt] - src[idim, isrc] {dup=idim}"] @@ -336,7 +350,17 @@ def get_strength_or_not(self, isrc, kernel_idx): @memoize_method def get_kernel(self): - loopy_insns, result_names = self.get_loopy_insns_and_result_names() + loopy_insns, additional_domain, result_names = \ + self.get_loopy_insns_and_result_names() + + from sumpy.tools import get_loopy_domain + domain = get_loopy_domain( + [ + ("imat", 0, "nresult"), + ("idim", 0, "dim") + ] + additional_domain + ) + kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( self.get_default_src_tgt_arguments() @@ -348,9 +372,8 @@ def get_kernel(self): + [lp.GlobalArg("result_%d" % i, dtype, shape="nresult") for i, dtype in enumerate(self.value_dtypes)]) - loopy_knl = lp.make_kernel([ - "{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}" - ], + loopy_knl = lp.make_kernel( + domain, self.get_kernel_scaling_assignments() # NOTE: itgt, isrc need to always be defined in case a statement # in loopy_insns or kernel_exprs needs them (e.g. hardcoded in diff --git a/sumpy/tools.py b/sumpy/tools.py index d5fdba8cf..fbc7eed3b 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -751,6 +751,9 @@ def get_loopy_domain(loop_domains): domain_names += ", " conditions += " and " - return "{[" + domain_names + "]:" + conditions + "}" + if domain_names == "" and conditions == "": + return [] + else: + return ["{[" + domain_names + "]:" + conditions + "}"] # vim: fdm=marker From 069504c5e4dcc63a9dab929465e35fe8d074eec3 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Fri, 13 Mar 2020 15:30:59 -0500 Subject: [PATCH 06/11] Temporarily switch to customized pymbolic branch for CI --- .test-conda-env-py3.yml | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml index 202edac89..95d8effdf 100644 --- a/.test-conda-env-py3.yml +++ b/.test-conda-env-py3.yml @@ -19,5 +19,5 @@ dependencies: - pip: - git+https://github.com/inducer/pytools - git+https://gitlab.tiker.net/inducer/boxtree - - git+https://gitlab.tiker.net/inducer/pymbolic + - git+https://github.com/gaohao95/pymbolic@substitute-mapper - git+https://github.com/inducer/loopy diff --git a/requirements.txt b/requirements.txt index f522da7df..b41e5f4e9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ numpy sympy==1.1.1 git+https://github.com/inducer/pytools -git+https://gitlab.tiker.net/inducer/pymbolic +git+https://github.com/gaohao95/pymbolic@substitute-mapper git+https://github.com/inducer/islpy git+https://github.com/inducer/pyopencl git+https://gitlab.tiker.net/inducer/boxtree From d0beb841dc8193d568ade8aaa503431dc2511ce1 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sat, 14 Mar 2020 15:10:48 -0500 Subject: [PATCH 07/11] Use SubstitutionMapper directly --- .test-conda-env-py3.yml | 2 +- requirements.txt | 2 +- sumpy/codegen.py | 8 +++----- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml index 95d8effdf..202edac89 100644 --- a/.test-conda-env-py3.yml +++ b/.test-conda-env-py3.yml @@ -19,5 +19,5 @@ dependencies: - pip: - git+https://github.com/inducer/pytools - git+https://gitlab.tiker.net/inducer/boxtree - - git+https://github.com/gaohao95/pymbolic@substitute-mapper + - git+https://gitlab.tiker.net/inducer/pymbolic - git+https://github.com/inducer/loopy diff --git a/requirements.txt b/requirements.txt index b41e5f4e9..f522da7df 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ numpy sympy==1.1.1 git+https://github.com/inducer/pytools -git+https://github.com/gaohao95/pymbolic@substitute-mapper +git+https://gitlab.tiker.net/inducer/pymbolic git+https://github.com/inducer/islpy git+https://github.com/inducer/pyopencl git+https://gitlab.tiker.net/inducer/boxtree diff --git a/sumpy/codegen.py b/sumpy/codegen.py index 34c39dc61..884810bd2 100644 --- a/sumpy/codegen.py +++ b/sumpy/codegen.py @@ -41,6 +41,7 @@ from sumpy.symbolic import (SympyToPymbolicMapper as SympyToPymbolicMapperBase) from sumpy.symbolic import Series, IdentityMapper, WalkMapper, SubstitutionMapper +from pymbolic.mapper.substitutor import make_subst_func import logging logger = logging.getLogger(__name__) @@ -132,8 +133,6 @@ def make_one_step_subst(assignments): # {{{ make substitution - from pymbolic import substitute - result = {} used_name_to_var = {} from pymbolic import evaluate @@ -142,7 +141,7 @@ def make_one_step_subst(assignments): for name in toposort: value = assignments[name] - value = substitute(value, result, mapper=SubstitutionMapper) + value = SubstitutionMapper(make_subst_func(result))(value) used_name_to_var.update( (used_name, prim.Variable(used_name)) for used_name in get_dependencies(value) @@ -186,9 +185,8 @@ def kill_trivial_assignments(assignments, retain_names=set()): unsubst_rej = make_one_step_subst(rejected_assignments) result = [] - from pymbolic import substitute for name, expr in approved_assignments: - r = substitute(expr, unsubst_rej, mapper=SubstitutionMapper) + r = SubstitutionMapper(make_subst_func(unsubst_rej))(expr) result.append((name, r)) logger.info( From 3f0ef2163a0724c3147d0fe068153582df94737f Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Tue, 17 Mar 2020 02:31:07 +0100 Subject: [PATCH 08/11] Update symengine to 0.6.1 --- .test-conda-env-py3.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml index 202edac89..9f8c38d0c 100644 --- a/.test-conda-env-py3.yml +++ b/.test-conda-env-py3.yml @@ -12,7 +12,7 @@ dependencies: - islpy - pyopencl - python=3 -- python-symengine=0.6.0 +- python-symengine=0.6.1 - pyfmmlib - pip From 203060dcf4f2b9096ec18ff67d4ebdbc5ea3fae1 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Wed, 18 Mar 2020 23:12:50 -0500 Subject: [PATCH 09/11] Nested sum and symbolic bound for symbolic sum --- sumpy/codegen.py | 32 +++++++++++++------------- sumpy/symbolic.py | 23 ++++++++++--------- sumpy/tools.py | 20 ++++++----------- test/test_codegen.py | 53 ++++++++++++++++++++++++++++++++++---------- 4 files changed, 75 insertions(+), 53 deletions(-) diff --git a/sumpy/codegen.py b/sumpy/codegen.py index 884810bd2..02ab334f9 100644 --- a/sumpy/codegen.py +++ b/sumpy/codegen.py @@ -67,16 +67,11 @@ class SympyToPymbolicMapper(SympyToPymbolicMapperBase): def map_Sum(self, expr): - if len(expr.limits) != 1: - raise NotImplementedError + pymbolic_limits = [] + for name, low, high in expr.limits: + pymbolic_limits.append((name, self.rec(low), self.rec(high))) - name, low, high = expr.limits[0] - - # TODO: name, low, high need recursion? - low = int(low) - high = int(high) - - return Series(self.rec(expr.function), self.rec(name), low, high) + return Series(self.rec(expr.function), pymbolic_limits) def not_supported(self, expr): if isinstance(expr, int): @@ -693,17 +688,20 @@ def __init__(self): def map_series(self, expr): function = self.rec(expr.function) + inames = [] - # TODO: recursion on low, high and name? - low = expr.low - high = expr.high - name = str(expr.name) + for name, low, high in expr.limits: + low = self.rec(low) + high = self.rec(high) + name = str(name) - self.additional_loop_domain.append( - (name, low, high) - ) + inames.append(name) + self.additional_loop_domain.append( + # +1 is used for converting the closed bound in sympy to open bound + (name, low + 1, high + 1) + ) - return lp.Reduction("sum", name, function) + return lp.Reduction("sum", tuple(inames), function) map_common_subexpression_uncached = IdentityMapper.map_common_subexpression diff --git a/sumpy/symbolic.py b/sumpy/symbolic.py index fb05aa2fb..f4f185fa8 100644 --- a/sumpy/symbolic.py +++ b/sumpy/symbolic.py @@ -254,26 +254,23 @@ def map_subscript(self, expr): # {{{ Series class Series(prim.Expression): - def __init__(self, function, name, low, high): + def __init__(self, function, limits): self.function = function - self.name = name - self.low = low - self.high = high + self.limits = limits mapper_method = "map_series" def __getinitargs__(self): - return self.function, self.name, self.low, self.high + return self.function, self.limits class IdentityMapper(IdentityMapperBase): def map_series(self, expr): - return Series( - self.rec(expr.function), - expr.name, - expr.low, - expr.high - ) + new_limits = [] + for name, low, high in expr.limits: + new_limits.append((name, self.rec(low), self.rec(high))) + + return Series(self.rec(expr.function), new_limits) class WalkMapper(WalkMapperBase): @@ -281,6 +278,10 @@ def map_series(self, expr, *args, **kwargs): if not self.visit(expr, *args, **kwargs): return + for name, low, high in expr.limits: + self.rec(low, *args, **kwargs) + self.rec(high, *args, **kwargs) + self.rec(expr.function, *args, **kwargs) self.post_visit(expr, *args, **kwargs) diff --git a/sumpy/tools.py b/sumpy/tools.py index fbc7eed3b..725a8bf6b 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -740,20 +740,14 @@ def get_loopy_domain(loop_domains): low, high = domain_to_range[duplicate_name] loop_domains[idx] = (name, low, high) - domain_names = "" - conditions = "" - + domains = [] for idx, (name, low, high) in enumerate(loop_domains): - domain_names += f"{name}" - conditions += f"{low} <= {name} < {high}" - - if idx + 1 != len(loop_domains): - domain_names += ", " - conditions += " and " + domains.append( + "{{ [{name}]: {low} <= {name} < {high} }}".format( + name=name, low=low, high=high + ) + ) - if domain_names == "" and conditions == "": - return [] - else: - return ["{[" + domain_names + "]:" + conditions + "}"] + return domains # vim: fdm=marker diff --git a/test/test_codegen.py b/test/test_codegen.py index d16dd578b..4b75f1fa6 100644 --- a/test/test_codegen.py +++ b/test/test_codegen.py @@ -117,9 +117,9 @@ def test_sym_sum(ctx_getter): from sumpy.assignment_collection import SymbolicAssignmentCollection sac = SymbolicAssignmentCollection() - from sympy.abc import j + from sympy.abc import i, j from sympy import Sum - sac.add_assignment("tmp", Sum(j, (j, 1, 10))) + sac.add_assignment("tmp", Sum(j, (j, 0, i))) from sumpy.codegen import to_loopy_insns insn, additional_loop_domain = to_loopy_insns( @@ -128,15 +128,13 @@ def test_sym_sum(ctx_getter): ) from sumpy.tools import get_loopy_domain - domain = get_loopy_domain( - [("i", 0, 5)] - + additional_loop_domain - ) - import loopy as lp knl = lp.make_kernel( - domain, - insn + [lp.Assignment("a[i]", "tmp")], + ["{[i]: 0<=i<=5}"] + get_loopy_domain(additional_loop_domain), + ["for i"] + + insn + + [lp.Assignment("a[i]", "tmp")] + + ["end"], lang_version=(2018, 2) ) @@ -144,13 +142,44 @@ def test_sym_sum(ctx_getter): result = result[0].get() import numpy as np - ref_sol = np.ones(5, dtype=np.int32) - ref_sol = ref_sol * 45 + ref_sol = np.array([0, 1, 3, 6, 10, 15], dtype=np.int32) - assert result.shape == (5,) + assert result.shape == (6,) assert np.allclose(result, ref_sol) +def test_sym_nested_sum(ctx_getter): + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + import six + from sumpy.assignment_collection import SymbolicAssignmentCollection + sac = SymbolicAssignmentCollection() + + from sympy.abc import i, j + from sympy import Sum + sac.add_assignment("tmp", Sum(i*j, (i, 0, 5), (j, 0, 3))) + + from sumpy.codegen import to_loopy_insns + insn, additional_loop_domain = to_loopy_insns( + six.iteritems(sac.assignments), + retain_names=["tmp"] + ) + + from sumpy.tools import get_loopy_domain + import loopy as lp + knl = lp.make_kernel( + get_loopy_domain(additional_loop_domain), + insn + [lp.Assignment("a", "tmp")], + lang_version=(2018, 2) + ) + + _, result = knl(queue) + result = result[0].get() + + assert result == 90 + + # You can test individual routines by typing # $ python test_fmm.py 'test_sumpy_fmm(cl.create_some_context)' From bf521258cd64c1d0181b80c35e71c8c1285ab611 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Thu, 19 Mar 2020 11:07:09 -0500 Subject: [PATCH 10/11] Address test failure --- sumpy/codegen.py | 2 +- sumpy/p2p.py | 42 +++++++++++++++++------------------------- sumpy/qbx.py | 44 ++++++++++++++++++-------------------------- test/test_codegen.py | 16 ++++++++++++---- 4 files changed, 48 insertions(+), 56 deletions(-) diff --git a/sumpy/codegen.py b/sumpy/codegen.py index 02ab334f9..156a4e78b 100644 --- a/sumpy/codegen.py +++ b/sumpy/codegen.py @@ -66,7 +66,7 @@ class SympyToPymbolicMapper(SympyToPymbolicMapperBase): - def map_Sum(self, expr): + def map_Sum(self, expr): # noqa pymbolic_limits = [] for name, low, high in expr.limits: pymbolic_limits.append((name, self.rec(low), self.rec(high))) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index e6f5e1e83..e54bb7393 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -173,13 +173,7 @@ def get_kernel(self): self.get_loopy_insns_and_result_names() from sumpy.tools import get_loopy_domain - domain = get_loopy_domain( - [ - ("itgt", 0, "ntargets"), - ("isrc", 0, "nsources"), - ("idim", 0, "dim") - ] + additional_domain - ) + additional_domain = get_loopy_domain(additional_domain) kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( @@ -191,8 +185,12 @@ def get_kernel(self): shape="nresults, ntargets", dim_tags="sep,C") ]) - loopy_knl = lp.make_kernel( - domain, + loopy_knl = lp.make_kernel([""" + {[itgt, isrc, idim]: \ + 0 <= itgt < ntargets and \ + 0 <= isrc < nsources and \ + 0 <= idim < dim} + """] + additional_domain, self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + ["<> d[idim] = targets[idim, itgt] - sources[idim, isrc]"] @@ -250,13 +248,7 @@ def get_kernel(self): self.get_loopy_insns_and_result_names() from sumpy.tools import get_loopy_domain - domain = get_loopy_domain( - [ - ("itgt", 0, "ntargets"), - ("isrc", 0, "nsources"), - ("idim", 0, "dim") - ] + additional_domain - ) + additional_domain = get_loopy_domain(additional_domain) kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( @@ -265,8 +257,12 @@ def get_kernel(self): shape="ntargets,nsources") for i, dtype in enumerate(self.value_dtypes)]) - loopy_knl = lp.make_kernel( - domain, + loopy_knl = lp.make_kernel([""" + {[itgt, isrc, idim]: \ + 0 <= itgt < ntargets and \ + 0 <= isrc < nsources and \ + 0 <= idim < dim} + """] + additional_domain, self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + ["<> d[idim] = targets[idim, itgt] - sources[idim, isrc]"] @@ -323,12 +319,7 @@ def get_kernel(self): self.get_loopy_insns_and_result_names() from sumpy.tools import get_loopy_domain - domain = get_loopy_domain( - [ - ("imat", 0, "nresult"), - ("idim", 0, "dim") - ] + additional_domain - ) + additional_domain = get_loopy_domain(additional_domain) kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( @@ -342,7 +333,8 @@ def get_kernel(self): for i, dtype in enumerate(self.value_dtypes)]) loopy_knl = lp.make_kernel( - domain, + ["{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}"] + + additional_domain, self.get_kernel_scaling_assignments() # NOTE: itgt, isrc need to always be defined in case a statement # in loopy_insns or kernel_exprs needs them (e.g. hardcoded in diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 1b15f9fe9..cc3e909cc 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -202,13 +202,7 @@ def get_kernel(self): self.get_loopy_insns_and_result_names() from sumpy.tools import get_loopy_domain - domain = get_loopy_domain( - [ - ("itgt", 0, "ntargets"), - ("isrc", 0, "nsources"), - ("idim", 0, "dim") - ] + additional_domain - ) + additional_domain = get_loopy_domain(additional_domain) kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( @@ -220,8 +214,12 @@ def get_kernel(self): None, shape="ntargets", order="C") for i in range(len(self.kernels))]) - loopy_knl = lp.make_kernel( - domain, + loopy_knl = lp.make_kernel([""" + {[itgt, isrc, idim]: \ + 0 <= itgt < ntargets and \ + 0 <= isrc < nsources and \ + 0 <= idim < dim} + """] + additional_domain, self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + ["<> a[idim] = center[idim, itgt] - src[idim, isrc] {dup=idim}"] @@ -283,13 +281,7 @@ def get_kernel(self): self.get_loopy_insns_and_result_names() from sumpy.tools import get_loopy_domain - domain = get_loopy_domain( - [ - ("itgt", 0, "ntargets"), - ("isrc", 0, "nsources"), - ("idim", 0, "dim") - ] + additional_domain - ) + additional_domain = get_loopy_domain(additional_domain) kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( @@ -298,8 +290,12 @@ def get_kernel(self): dtype, shape="ntargets, nsources", order="C") for i, dtype in enumerate(self.value_dtypes)]) - loopy_knl = lp.make_kernel( - domain, + loopy_knl = lp.make_kernel([""" + {[itgt, isrc, idim]: \ + 0 <= itgt < ntargets and \ + 0 <= isrc < nsources and \ + 0 <= idim < dim} + """] + additional_domain, self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + ["<> a[idim] = center[idim, itgt] - src[idim, isrc] {dup=idim}"] @@ -354,12 +350,7 @@ def get_kernel(self): self.get_loopy_insns_and_result_names() from sumpy.tools import get_loopy_domain - domain = get_loopy_domain( - [ - ("imat", 0, "nresult"), - ("idim", 0, "dim") - ] + additional_domain - ) + additional_domain = get_loopy_domain(additional_domain) kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( @@ -372,8 +363,9 @@ def get_kernel(self): + [lp.GlobalArg("result_%d" % i, dtype, shape="nresult") for i, dtype in enumerate(self.value_dtypes)]) - loopy_knl = lp.make_kernel( - domain, + loopy_knl = lp.make_kernel([ + "{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}" + ] + additional_domain, self.get_kernel_scaling_assignments() # NOTE: itgt, isrc need to always be defined in case a statement # in loopy_insns or kernel_exprs needs them (e.g. hardcoded in diff --git a/test/test_codegen.py b/test/test_codegen.py index 4b75f1fa6..79130a2fc 100644 --- a/test/test_codegen.py +++ b/test/test_codegen.py @@ -28,6 +28,7 @@ import pyopencl as cl from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) +import pytest import logging logger = logging.getLogger(__name__) @@ -110,10 +111,13 @@ def test_line_taylor_coeff_growth(): def test_sym_sum(ctx_getter): + from sumpy.symbolic import USE_SYMENGINE + if USE_SYMENGINE: + pytest.xfail("Symengine does not support symbolic sum yet") + ctx = ctx_getter() queue = cl.CommandQueue(ctx) - import six from sumpy.assignment_collection import SymbolicAssignmentCollection sac = SymbolicAssignmentCollection() @@ -121,6 +125,7 @@ def test_sym_sum(ctx_getter): from sympy import Sum sac.add_assignment("tmp", Sum(j, (j, 0, i))) + import six from sumpy.codegen import to_loopy_insns insn, additional_loop_domain = to_loopy_insns( six.iteritems(sac.assignments), @@ -149,10 +154,13 @@ def test_sym_sum(ctx_getter): def test_sym_nested_sum(ctx_getter): + from sumpy.symbolic import USE_SYMENGINE + if USE_SYMENGINE: + pytest.xfail("Symengine does not support symbolic sum yet") + ctx = ctx_getter() queue = cl.CommandQueue(ctx) - import six from sumpy.assignment_collection import SymbolicAssignmentCollection sac = SymbolicAssignmentCollection() @@ -160,6 +168,7 @@ def test_sym_nested_sum(ctx_getter): from sympy import Sum sac.add_assignment("tmp", Sum(i*j, (i, 0, 5), (j, 0, 3))) + import six from sumpy.codegen import to_loopy_insns insn, additional_loop_domain = to_loopy_insns( six.iteritems(sac.assignments), @@ -187,7 +196,6 @@ def test_sym_nested_sum(ctx_getter): if len(sys.argv) > 1: exec(sys.argv[1]) else: - from pytest import main - main([__file__]) + pytest.main([__file__]) # vim: fdm=marker From 3ad1eca22d80ca650fa525b50c7072507ee606af Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Thu, 19 Mar 2020 11:42:17 -0500 Subject: [PATCH 11/11] Minor tweak --- sumpy/codegen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sumpy/codegen.py b/sumpy/codegen.py index 156a4e78b..1d72ed868 100644 --- a/sumpy/codegen.py +++ b/sumpy/codegen.py @@ -69,7 +69,7 @@ class SympyToPymbolicMapper(SympyToPymbolicMapperBase): def map_Sum(self, expr): # noqa pymbolic_limits = [] for name, low, high in expr.limits: - pymbolic_limits.append((name, self.rec(low), self.rec(high))) + pymbolic_limits.append((self.rec(name), self.rec(low), self.rec(high))) return Series(self.rec(expr.function), pymbolic_limits)