@@ -126,7 +126,7 @@ def default_name(self):
126126 def get_kernel (self , max_ntargets_in_one_box ):
127127 ncoeffs = len (self .expansion )
128128 loopy_args = self .get_loopy_args ()
129- max_work_items = min (32 , max (ncoeffs , max_ntargets_in_one_box ))
129+ max_work_items = min (256 , max (ncoeffs , max_ntargets_in_one_box ))
130130
131131 loopy_knl = lp .make_kernel (
132132 [
@@ -211,8 +211,8 @@ def get_optimized_kernel(self, max_ntargets_in_one_box):
211211 inner_knl , optimizations = self .get_cached_loopy_knl_and_optimizations ()
212212 knl = self .get_kernel (max_ntargets_in_one_box = max_ntargets_in_one_box )
213213 knl = lp .tag_inames (knl , {"itgt_box" : "g.0" })
214- knl = lp .split_iname (knl , "itgt_offset" , 32 , inner_tag = "l.0" )
215- knl = lp .split_iname (knl , "icoeff" , 32 , inner_tag = "l.0" )
214+ knl = lp .split_iname (knl , "itgt_offset" , 256 , inner_tag = "l.0" )
215+ knl = lp .split_iname (knl , "icoeff" , 256 , inner_tag = "l.0" )
216216 knl = lp .add_inames_to_insn (knl , "dummy" ,
217217 "id:fetch_init* or id:fetch_center or id:kernel_scaling" )
218218 knl = lp .add_inames_to_insn (knl , "itgt_box" , "id:kernel_scaling" )
@@ -276,7 +276,7 @@ def default_name(self):
276276 def get_kernel (self , max_ntargets_in_one_box ):
277277 ncoeffs = len (self .expansion )
278278 loopy_args = self .get_loopy_args ()
279- max_work_items = min (32 , max (ncoeffs , max_ntargets_in_one_box ))
279+ max_work_items = min (256 , max (ncoeffs , max_ntargets_in_one_box ))
280280
281281 loopy_knl = lp .make_kernel (
282282 [
@@ -383,8 +383,8 @@ def get_optimized_kernel(self, max_ntargets_in_one_box):
383383 knl = lp .tag_inames (knl , {"itgt_box" : "g.0" , "dummy" : "l.0" })
384384 knl = lp .unprivatize_temporaries_with_inames (knl ,
385385 "itgt_offset" , "result_temp" )
386- knl = lp .split_iname (knl , "itgt_offset" , 32 , inner_tag = "l.0" )
387- knl = lp .split_iname (knl , "icoeff" , 32 , inner_tag = "l.0" )
386+ knl = lp .split_iname (knl , "itgt_offset" , 256 , inner_tag = "l.0" )
387+ knl = lp .split_iname (knl , "icoeff" , 256 , inner_tag = "l.0" )
388388 knl = lp .privatize_temporaries_with_inames (knl ,
389389 "itgt_offset_outer" , "result_temp" )
390390 knl = lp .duplicate_inames (knl , "itgt_offset_outer" , "id:init_result" )
0 commit comments