Merge pull request numba#8535 from apmasell/llvm14

LLVM14
Deezy33-creator · Mar 13, 2023 · c66a986 · c66a986
2 parents ce69f30 + 03cf859
commit c66a986
Show file tree

Hide file tree

Showing 11 changed files with 96 additions and 56 deletions.
diff --git a/buildscripts/condarecipe.local/meta.yaml b/buildscripts/condarecipe.local/meta.yaml
@@ -34,7 +34,7 @@ requirements:
     - setuptools
     - importlib_metadata       # [py<39]
     # On channel https://anaconda.org/numba/
-    - llvmlite >=0.40.0dev0,<0.40
+    - llvmlite >=0.40,<0.40
     # TBB devel version is to match TBB libs.
     # 2020.3 is the last version with the "old" ABI
     # NOTE: 2021.1..2021.5 are API compatible for Numba's purposes.

diff --git a/numba/core/callconv.py b/numba/core/callconv.py
@@ -9,7 +9,7 @@
 from llvmlite import ir
 
 from numba.core import types, cgutils
-from numba.core.base import PYOBJECT, GENERIC_POINTER
+from numba.core.base import GENERIC_POINTER
 
 
 TryStatus = namedtuple('TryStatus', ['in_try', 'excinfo'])
@@ -90,7 +90,7 @@ def return_native_none(self, builder):
         self._return_errcode_raw(builder, RETCODE_NONE)
 
     def return_exc(self, builder):
-        self._return_errcode_raw(builder, RETCODE_EXC, mark_exc=True)
+        self._return_errcode_raw(builder, RETCODE_EXC)
 
     def return_stop_iteration(self, builder):
         self._return_errcode_raw(builder, RETCODE_STOPIT)
@@ -207,12 +207,12 @@ def return_user_exc(self, builder, exc, exc_args=None, loc=None,
 
         call_helper = self._get_call_helper(builder)
         exc_id = call_helper._add_exception(exc, exc_args, locinfo)
-        self._return_errcode_raw(builder, _const_int(exc_id), mark_exc=True)
+        self._return_errcode_raw(builder, _const_int(exc_id))
 
     def return_status_propagate(self, builder, status):
         self._return_errcode_raw(builder, status.code)
 
-    def _return_errcode_raw(self, builder, code, mark_exc=False):
+    def _return_errcode_raw(self, builder, code):
         if isinstance(code, int):
             code = _const_int(code)
         builder.ret(code)
@@ -395,7 +395,9 @@ def set_static_user_exc(self, builder, exc, exc_args=None, loc=None,
         exc = (exc, exc_args, locinfo)
         struct_gv = pyapi.serialize_object(exc)
         excptr = self._get_excinfo_argument(builder.function)
-        builder.store(struct_gv, excptr)
+        store = builder.store(struct_gv, excptr)
+        md = builder.module.add_metadata([ir.IntType(1)(1)])
+        store.set_metadata("numba_exception_output", md)
 
     def return_user_exc(self, builder, exc, exc_args=None, loc=None,
                         func_name=None):
@@ -409,7 +411,7 @@ def return_user_exc(self, builder, exc, exc_args=None, loc=None,
             builder.branch(try_info['target'])
         else:
             # Return from the current function
-            self._return_errcode_raw(builder, RETCODE_USEREXC, mark_exc=True)
+            self._return_errcode_raw(builder, RETCODE_USEREXC)
 
     def _get_try_state(self, builder):
         try:
@@ -457,14 +459,10 @@ def return_status_propagate(self, builder, status):
         excptr = self._get_excinfo_argument(builder.function)
         builder.store(status.excinfoptr, excptr)
         with builder.if_then(builder.not_(trystatus.in_try)):
-            self._return_errcode_raw(builder, status.code, mark_exc=True)
+            self._return_errcode_raw(builder, status.code)
 
-    def _return_errcode_raw(self, builder, code, mark_exc=False):
-        ret = builder.ret(code)
-
-        if mark_exc:
-            md = builder.module.add_metadata([ir.IntType(1)(1)])
-            ret.set_metadata("ret_is_raise", md)
+    def _return_errcode_raw(self, builder, code):
+        builder.ret(code)
 
     def _get_return_status(self, builder, code, excinfoptr):
         """

diff --git a/numba/core/codegen.py b/numba/core/codegen.py
@@ -234,7 +234,7 @@ def init_digraph(name, fname, fontsize):
         nrt_meminfo = re.compile("@NRT_MemInfo")
         ll_intrin_calls = re.compile(r".*call.*@llvm\..*")
         ll_function_call = re.compile(r".*call.*@.*")
-        ll_raise = re.compile(r"ret i32.*\!ret_is_raise.*")
+        ll_raise = re.compile(r"store .*\!numba_exception_output.*")
         ll_return = re.compile("ret i32 [^1],?.*")
 
         # wrapper function for line wrapping LLVM lines
@@ -1215,14 +1215,21 @@ def _module_pass_manager(self, **kwargs):
             # This knocks loops into rotated form early to reduce the likelihood
             # of vectorization failing due to unknown PHI nodes.
             pm.add_loop_rotate_pass()
-            # LLVM 11 added LFTR to the IV Simplification pass, this interacted
-            # badly with the existing use of the InstructionCombiner here and
-            # ended up with PHI nodes that prevented vectorization from
-            # working. The desired vectorization effects can be achieved
-            # with this in LLVM 11 (and also < 11) but at a potentially
-            # slightly higher cost:
-            pm.add_licm_pass()
-            pm.add_cfg_simplification_pass()
+            if ll.llvm_version_info[0] < 12:
+                # LLVM 11 added LFTR to the IV Simplification pass,
+                # this interacted badly with the existing use of the
+                # InstructionCombiner here and ended up with PHI nodes that
+                # prevented vectorization from working. The desired
+                # vectorization effects can be achieved with this in LLVM 11
+                # (and also < 11) but at a potentially slightly higher cost:
+                pm.add_licm_pass()
+                pm.add_cfg_simplification_pass()
+            else:
+                # These passes are required to get SVML to vectorize tests
+                # properly on LLVM 14
+                pm.add_instruction_combining_pass()
+                pm.add_jump_threading_pass()
+
         if config.LLVM_REFPRUNE_PASS:
             pm.add_refprune_pass(_parse_refprune_flags())
         return pm

diff --git a/numba/misc/llvm_pass_timings.py b/numba/misc/llvm_pass_timings.py
@@ -52,6 +52,7 @@ def get(self):
         "wall_time",
         "wall_percent",
         "pass_name",
+        "instruction",
     ],
 )
 
@@ -216,6 +217,7 @@ def parse(raw_data):
                 "System Time": "system",
                 "User+System": "user_system",
                 "Wall Time": "wall",
+                "Instr": "instruction",
                 "Name": "pass_name",
             }
             for ln in line_iter:
@@ -229,17 +231,22 @@ def parse(raw_data):
             assert headers[-1] == 'pass_name'
             # compute the list of available attributes from the column headers
             attrs = []
+            n = r"\s*((?:[0-9]+\.)?[0-9]+)"
+            pat = ""
             for k in headers[:-1]:
-                attrs.append(f"{k}_time")
-                attrs.append(f"{k}_percent")
+                if k == "instruction":
+                    pat += n
+                else:
+                    attrs.append(f"{k}_time")
+                    attrs.append(f"{k}_percent")
+                    pat += f"\\s+(?:{n}\\s*\\({n}%\\)|-+)"
+
             # put default value 0.0 to all missing attributes
             missing = {}
             for k in PassTimingRecord._fields:
                 if k not in attrs and k != 'pass_name':
                     missing[k] = 0.0
             # parse timings
-            n = r"\s*((?:[0-9]+\.)?[0-9]+)"
-            pat = f"\\s+(?:{n}\\s*\\({n}%\\)|-+)" * (len(headers) - 1)
             pat += r"\s*(.*)"
             for ln in line_iter:
                 m = re.match(pat, ln)

diff --git a/numba/tests/test_array_reductions.py b/numba/tests/test_array_reductions.py
@@ -1000,6 +1000,8 @@ def assert_raises(arr, axis):
         assert_raises(arr1d, -2)
         assert_raises(arr2d, -3)
         assert_raises(arr2d, 2)
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def test_argmax_axis_must_be_integer(self):
         arr = np.arange(6)
@@ -1073,6 +1075,9 @@ def assert_raises(arr, axis):
         assert_raises(arr2d, -3)
         assert_raises(arr2d, 2)
 
+        # Exceptions leak references
+        self.disable_leak_check()
+
     def test_argmin_axis_must_be_integer(self):
         arr = np.arange(6)
 

diff --git a/numba/tests/test_debuginfo.py b/numba/tests/test_debuginfo.py
@@ -184,7 +184,7 @@ def test_DILocation(self):
         @njit(debug=True, error_model='numpy')
         def foo(a):
             b = a + 1.23
-            c = a * 2.34
+            c = b * 2.34
             d = b / c
             print(d)
             return d
@@ -223,9 +223,16 @@ def foo(a):
 
         # Find non-call instr and check the sequence is as expected
         instrs = [x for x in block.instructions if x.opcode != 'call']
-        op_seq = [x.opcode for x in instrs]
-        op_expect = ('fadd', 'fmul', 'fdiv')
-        self.assertIn(''.join(op_expect), ''.join(op_seq))
+        op_expect = {'fadd', 'fmul', 'fdiv'}
+        started = False
+        for x in instrs:
+            if x.opcode in op_expect:
+                op_expect.remove(x.opcode)
+                if not started:
+                    started = True
+            elif op_expect and started:
+                self.fail("Math opcodes are not contiguous")
+        self.assertFalse(op_expect, "Math opcodes were not found")
 
         # Parse out metadata from end of each line, check it monotonically
         # ascends with LLVM source line. Also store all the dbg references,

diff --git a/numba/tests/test_np_functions.py b/numba/tests/test_np_functions.py
@@ -546,6 +546,8 @@ def test_sinc_exceptions(self):
             cfunc('str')
         self.assertIn('Argument "x" must be a Number or array-like',
                       str(raises.exception))
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def test_contains(self):
         def arrs():
@@ -650,6 +652,8 @@ def test_angle_exceptions(self):
             cfunc('hello')
         self.assertIn('Argument "z" must be a complex or Array[complex]',
                       str(raises.exception))
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def test_array_equal(self):
         def arrays():
@@ -777,6 +781,8 @@ def test_np_append_exceptions(self):
             'The third argument "axis" must be an integer',
             str(raises.exception)
         )
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def test_delete(self):
 
@@ -840,6 +846,8 @@ def test_delete_exceptions(self):
             'obj must be less than the len(arr)',
             str(raises.exception),
         )
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def diff_arrays(self):
         """
@@ -893,6 +901,8 @@ def test_diff2_exceptions(self):
             with self.assertRaises(ValueError) as raises:
                 cfunc(arr, n)
             self.assertIn("order must be non-negative", str(raises.exception))
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def test_isscalar(self):
         def values():
@@ -1097,6 +1107,8 @@ def test_bincount1_exceptions(self):
             cfunc([2, -1])
         self.assertIn("first argument must be non-negative",
                       str(raises.exception))
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def test_bincount2(self):
         pyfunc = bincount2
@@ -5299,6 +5311,8 @@ def not_literal_axis(a, i, axis):
         with self.assertRaises(ValueError) as raises:
             gen(0)(arr2d, np.ones((2, 3), dtype=np.uint64))
         self.assertIn("dimensions don't match", str(raises.exception))
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def test_nan_to_num(self):
         # Test cases are from

diff --git a/numba/tests/test_np_randomgen.py b/numba/tests/test_np_randomgen.py
@@ -1097,6 +1097,8 @@ def test_noncentral_chisquare(self):
             curr_args[2] = -1
             nb_dist_func(*curr_args)
         self.assertIn('nonc < 0', str(raises.exception))
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def test_noncentral_f(self):
         # For this test dtype argument is never used, so we pass [None] as dtype
@@ -1141,6 +1143,8 @@ def test_noncentral_f(self):
             curr_args[3] = -1
             nb_dist_func(*curr_args)
         self.assertIn('nonc < 0', str(raises.exception))
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def test_logseries(self):
         # For this test dtype argument is never used, so we pass [None] as dtype
@@ -1173,6 +1177,8 @@ def test_logseries(self):
                 curr_args[1] = _p
                 nb_dist_func(*curr_args)
             self.assertIn('p < 0, p >= 1 or p is NaN', str(raises.exception))
+        # Exceptions leak references
+        self.disable_leak_check()
 
 
 class TestGeneratorCaching(TestCase, SerialMixin):

diff --git a/numba/tests/test_parfors.py b/numba/tests/test_parfors.py
@@ -415,7 +415,7 @@ def _get_fast_instructions(ir):
             return fast_inst
 
         def _assert_fast(instrs):
-            ops = ('fadd', 'fsub', 'fmul', 'fdiv', 'frem', 'fcmp')
+            ops = ('fadd', 'fsub', 'fmul', 'fdiv', 'frem', 'fcmp', 'call')
             for inst in instrs:
                 count = 0
                 for op in ops:
@@ -4462,11 +4462,6 @@ def get_gufunc_asm(self, func, schedule_type, *args, **kwargs):
 
             return asm
 
-    # this is a common match pattern for something like:
-    # \n\tvsqrtpd\t-192(%rbx,%rsi,8), %zmm0\n
-    # to check vsqrtpd operates on zmm
-    match_vsqrtpd_on_zmm = re.compile('\n\s+vsqrtpd\s+.*zmm.*\n')
-
     @linux_only
     def test_vectorizer_fastmath_asm(self):
         """ This checks that if fastmath is set and the underlying hardware
@@ -4490,22 +4485,19 @@ def will_vectorize(A):
                                        fastmath=True)
         slow_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg,
                                        fastmath=False)
-
         for v in fast_asm.values():
             # should unwind and call vector sqrt then vector add
             # all on packed doubles using zmm's
             self.assertTrue('vaddpd' in v)
-            self.assertTrue('vsqrtpd' in v)
+            self.assertTrue('vsqrtpd' in v or '__svml_sqrt' in v)
             self.assertTrue('zmm' in v)
-            # make sure vsqrtpd operates on zmm
-            self.assertTrue(len(self.match_vsqrtpd_on_zmm.findall(v)) > 1)
 
         for v in slow_asm.values():
             # vector variants should not be present
             self.assertTrue('vaddpd' not in v)
             self.assertTrue('vsqrtpd' not in v)
             # check scalar variant is present
-            self.assertTrue('vsqrtsd' in v)
+            self.assertTrue('vsqrtsd' in v and '__svml_sqrt' not in v)
             self.assertTrue('vaddsd' in v)
             # check no zmm addressing is present
             self.assertTrue('zmm' not in v)
@@ -4550,11 +4542,9 @@ def will_vectorize(A):
         for v in vec_asm.values():
             # should unwind and call vector sqrt then vector mov
             # all on packed doubles using zmm's
-            self.assertTrue('vsqrtpd' in v)
+            self.assertTrue('vsqrtpd' in v or '__svml_sqrt' in v)
             self.assertTrue('vmovupd' in v)
             self.assertTrue('zmm' in v)
-            # make sure vsqrtpd operates on zmm
-            self.assertTrue(len(self.match_vsqrtpd_on_zmm.findall(v)) > 1)
 
     @linux_only
     # needed as 32bit doesn't have equivalent signed/unsigned instruction

diff --git a/numba/tests/test_refop_pruning.py b/numba/tests/test_refop_pruning.py
@@ -118,6 +118,22 @@ def func(n):
                     raise ValueError
             return x
 
+        with set_refprune_flags('per_bb,fanout'):
+            self.check(func, (types.intp), basicblock=True, diamond=False,
+                       fanout=True, fanout_raise=False)
+
+    def test_fanout_3(self):
+        # fanout with raise
+        def func(n):
+            ary = np.arange(n)
+            # basically an impl of array.sum
+            c = 0
+            # The raise is from StopIteration of next(iterator) implicit in
+            # the for loop
+            for v in np.nditer(ary):
+                c += v.item()
+            return 1
+
         with set_refprune_flags('per_bb,fanout_raise'):
             self.check(func, (types.intp), basicblock=True, diamond=False,
                        fanout=False, fanout_raise=True)