|
26 | 26 | import loopy as lp |
27 | 27 | import numpy as np |
28 | 28 | from sumpy.expansion import ExpansionBase |
29 | | -from sumpy.kernel import Kernel |
| 29 | +from sumpy.kernel import Kernel, LaplaceKernel |
30 | 30 | import sumpy.symbolic as sym |
31 | 31 | from sumpy.assignment_collection import SymbolicAssignmentCollection |
32 | 32 | from sumpy.tools import gather_loopy_arguments, gather_loopy_source_arguments |
@@ -134,6 +134,193 @@ def make_e2p_loopy_kernel(expansion: ExpansionBase, kernels: Sequence[Kernel]) \ |
134 | 134 | return loopy_knl, optimizations |
135 | 135 |
|
136 | 136 |
|
| 137 | +def make_m2p_loopy_kernel_for_volume_taylor( |
| 138 | + expansion: ExpansionBase, kernels: Sequence[Kernel]) -> lp.TranslationUnit: |
| 139 | + |
| 140 | + #raise NotImplementedError() |
| 141 | + if len(kernels) > 1: |
| 142 | + raise NotImplementedError() |
| 143 | + |
| 144 | + kernel = kernels[0] |
| 145 | + if not isinstance(kernel, LaplaceKernel): |
| 146 | + raise NotImplementedError() |
| 147 | + |
| 148 | + dim = expansion.dim |
| 149 | + order = expansion.order |
| 150 | + |
| 151 | + if dim != 3 or order < 2: |
| 152 | + raise NotImplementedError() |
| 153 | + |
| 154 | + b = pymbolic.var("b") |
| 155 | + ncoeffs = len(expansion.get_coefficient_identifiers()) |
| 156 | + |
| 157 | + temp = pymbolic.var("temp") |
| 158 | + inv_r = pymbolic.var("inv_r") |
| 159 | + inv_r2 = pymbolic.var("inv_r2") |
| 160 | + |
| 161 | + domains = [ |
| 162 | + "{[idim]: 0<=idim<dim}", |
| 163 | + ] |
| 164 | + insns = [ |
| 165 | + lp.Assignment( |
| 166 | + assignee="b[idim]", |
| 167 | + expression="(target[idim]-center[idim])/rscale", |
| 168 | + temp_var_type=lp.Optional(None), |
| 169 | + ), |
| 170 | + lp.Assignment( |
| 171 | + assignee=inv_r, |
| 172 | + expression="rsqrt(b[0]*b[0] + b[1]*b[1] + b[2]* b[2])", |
| 173 | + temp_var_type=lp.Optional(None), |
| 174 | + ), |
| 175 | + lp.Assignment( |
| 176 | + id="inv_r2", |
| 177 | + assignee=inv_r2, |
| 178 | + expression=inv_r*inv_r, |
| 179 | + temp_var_type=lp.Optional(None), |
| 180 | + ), |
| 181 | + ] |
| 182 | + |
| 183 | + init_exprs = {} |
| 184 | + init_exprs[(0, 0, 0)] = inv_r |
| 185 | + |
| 186 | + # Order 1 expressions |
| 187 | + for i in range(dim): |
| 188 | + mi = [0]*dim |
| 189 | + mi[i] = 1 |
| 190 | + init_exprs[tuple(mi)] = -inv_r * b[i] |
| 191 | + |
| 192 | + # Order 2 expressions |
| 193 | + for i in range(dim): |
| 194 | + for j in range(dim): |
| 195 | + mi = [0]*dim |
| 196 | + mi[i] = 1 |
| 197 | + mi[j] = 1 |
| 198 | + init_exprs[tuple(mi)] = inv_r*inv_r2*inv_r2*3*b[i]*b[j] |
| 199 | + |
| 200 | + # Order 3 expressions |
| 201 | + init_exprs[(1, 1, 1)] = -15 * inv_r * inv_r2 * inv_r2 * inv_r2 \ |
| 202 | + * b[0] * b[1] * b[2] |
| 203 | + |
| 204 | + depends_on = frozenset(["inv_r2"]) |
| 205 | + for i in range(2): |
| 206 | + for j in range(2): |
| 207 | + for k in range(2): |
| 208 | + insn = lp.Assignment( |
| 209 | + id=f"init_{i}_{j}_{k}", |
| 210 | + assignee=temp[i, j, k], |
| 211 | + expression=init_exprs[(i, j, k)], |
| 212 | + depends_on=depends_on, |
| 213 | + ) |
| 214 | + depends_on = frozenset([f"init_{i}_{j}_{k}"]) |
| 215 | + insns.append(insn) |
| 216 | + |
| 217 | + wrangler = expansion.expansion_terms_wrangler |
| 218 | + result = pymbolic.var("result") |
| 219 | + coeffs = pymbolic.var("coeffs") |
| 220 | + |
| 221 | + # For x1 == 0, 1 |
| 222 | + x2 = pymbolic.var("x2") |
| 223 | + x1 = pymbolic.var("x1") |
| 224 | + x0 = pymbolic.var("x0") |
| 225 | + domains += [ |
| 226 | + f"{{[{x1}]: 0<={x1}<=1}}", |
| 227 | + f"{{[{x0}]: 0<={x0}<=1 and {x0}<order-{x1} }}", |
| 228 | + f"{{[{x2}]: 0<={x2}<=order-{x0}-{x1} }}", |
| 229 | + ] |
| 230 | + expr = (2*x2 - 1) * b[2] * temp[x0, x1, x2 - 1] + (x2 - 1)*(x2 - 1) \ |
| 231 | + * temp[x0, x1, x2 - 2] |
| 232 | + expr += prim.If(prim.Comparison(x0, ">=", 1), |
| 233 | + 2*x0*b[0]*temp[x0 - 1, x1, x2], 0) |
| 234 | + expr += prim.If(prim.Comparison(x1, ">=", 1), |
| 235 | + 2*x1*b[1]*temp[x0, x1 - 1, x2], 0) |
| 236 | + expr *= -inv_r2 |
| 237 | + insns += [ |
| 238 | + lp.Assignment( |
| 239 | + id=f"temp_{x0}_{x1}_{x2}", |
| 240 | + assignee=temp[x0, x1, x2], |
| 241 | + expression=-inv_r2, |
| 242 | + depends_on=depends_on, |
| 243 | + predicates=frozenset([prim.Comparison(x2, ">=", 2)]), |
| 244 | + ), |
| 245 | + lp.Assignment( |
| 246 | + id=f"update_{x0}_{x1}_{x2}", |
| 247 | + assignee=result[0], |
| 248 | + expression=(result[0] + coeffs[wrangler.get_storage_index([x0, x1, x2])] |
| 249 | + * temp[x0, x1, x2]), |
| 250 | + depends_on=frozenset([f"temp_{x0}_{x1}_{x2}"]) |
| 251 | + ) |
| 252 | + ] |
| 253 | + depends_on = frozenset([f"update_{x0}_{x1}_{x2}"]) |
| 254 | + |
| 255 | + # For x1 >=2 |
| 256 | + x2 = pymbolic.var("y2") |
| 257 | + x1 = pymbolic.var("y1") |
| 258 | + x0 = pymbolic.var("y0") |
| 259 | + domains += [ |
| 260 | + f"{{[{x1}]: 2<={x1}<=order}}", |
| 261 | + f"{{[{x0}]: 0<={x0}<=1 and {x0}<order-{x1} }}", |
| 262 | + f"{{[{x2}]: 0<={x2}<=order-{x0}-{x1} }}", |
| 263 | + ] |
| 264 | + expr = ((2*x1 - 1) * b[1] * temp[x0, (x1 - 1) % 2, x2] + (x1 - 1)*(x1 - 1) |
| 265 | + * temp[x0, (x1 - 2) % 2, x2]) |
| 266 | + expr += prim.If(prim.Comparison(x0, ">=", 1), |
| 267 | + 2*x0*b[0]*temp[x0 - 1, x1 % 2, x2], 0) |
| 268 | + expr += prim.If(prim.Comparison(x2, ">=", 1), |
| 269 | + 2*x2*b[2]*temp[x0, x1 % 2, x2 - 1], 0) |
| 270 | + expr += prim.If(prim.Comparison(x2, ">=", 2), |
| 271 | + 2*b[2]*temp[x0, x1 % 2, x2 - 1], 0) |
| 272 | + expr *= -inv_r2 |
| 273 | + insns += [ |
| 274 | + lp.Assignment( |
| 275 | + id=f"temp_{x0}_{x1}_{x2}", |
| 276 | + assignee=temp[x0, x1 % 2, x2], |
| 277 | + expression=-inv_r2, |
| 278 | + depends_on=depends_on, |
| 279 | + ), |
| 280 | + lp.Assignment( |
| 281 | + id=f"update_{x0}_{x1}_{x2}", |
| 282 | + assignee=result[0], |
| 283 | + expression=(result[0] + coeffs[wrangler.get_storage_index([x0, x1, x2])] |
| 284 | + * temp[x0, x1 % 2, x2]), |
| 285 | + depends_on=frozenset([f"temp_{x0}_{x1}_{x2}"]) |
| 286 | + ) |
| 287 | + ] |
| 288 | + depends_on = frozenset([f"update_{x0}_{x1}_{x2}"]) |
| 289 | + |
| 290 | + loopy_knl = lp.make_function(domains, insns, |
| 291 | + kernel_data=[ |
| 292 | + lp.GlobalArg("result", shape=(len(kernels),), is_input=True, |
| 293 | + is_output=True), |
| 294 | + lp.GlobalArg("coeffs", |
| 295 | + shape=(ncoeffs,), is_input=True, is_output=False), |
| 296 | + lp.GlobalArg("center, target", |
| 297 | + shape=(dim,), is_input=True, is_output=False), |
| 298 | + lp.ValueArg("rscale", is_input=True), |
| 299 | + lp.ValueArg("itgt", is_input=True), |
| 300 | + lp.ValueArg("ntargets", is_input=True), |
| 301 | + lp.GlobalArg("targets", |
| 302 | + shape=(dim, "ntargets"), is_input=True, is_output=False), |
| 303 | + lp.TemporaryVariable("temp"), |
| 304 | + ...], |
| 305 | + name="e2p", |
| 306 | + lang_version=lp.MOST_RECENT_LANGUAGE_VERSION, |
| 307 | + fixed_parameters={"dim": dim, "order": order}, |
| 308 | + ) |
| 309 | + |
| 310 | + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") |
| 311 | + loopy_knl = lp.tag_inames(loopy_knl, "x0*:unr") |
| 312 | + loopy_knl = lp.tag_inames(loopy_knl, "x1*:unr") |
| 313 | + loopy_knl = lp.tag_inames(loopy_knl, "y0*:unr") |
| 314 | + for kernel in kernels: |
| 315 | + loopy_knl = kernel.prepare_loopy_kernel(loopy_knl) |
| 316 | + |
| 317 | + loopy_knl = lp.simplify_indices(loopy_knl) |
| 318 | + |
| 319 | + optimizations = [] |
| 320 | + |
| 321 | + return loopy_knl, optimizations |
| 322 | + |
| 323 | + |
137 | 324 | def make_p2e_loopy_kernel( |
138 | 325 | expansion: ExpansionBase, kernels: Sequence[Kernel], |
139 | 326 | strength_usage: Sequence[int], nstrengths: int) -> lp.TranslationUnit: |
|
0 commit comments