From 985cbd4d93a2e589dff13bef58b276799715bab3 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Wed, 12 May 2021 11:10:08 +0100
Subject: [PATCH 01/45] CUDA: Find libraries from NVIDIA CUDA conda packages

The NVIDIA CUDA conda packages place NVVM in `${CONDA_PREFIX}/nvvm`, so
we need a separate mechanism to detect libraries when this package is
installed.

Its priority is placed just ahead of the System toolkit, so that a user
can use different toolkits by installing the NVIDIA conda packages in
their environments. The order between the normal (e.g. Anaconda,
conda-forge) and NVIDIA conda packages is not important, since one
wouldn't install multiple toolkits into one environment.

NVIDIA CUDA conda packages can be installed with:

```
conda install nvidia::cuda
```

(from the NVIDIA Anaconda channel)
---
 numba/cuda/cuda_paths.py | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/numba/cuda/cuda_paths.py b/numba/cuda/cuda_paths.py
index b9988bc317b..5d3f7190cc7 100644
--- a/numba/cuda/cuda_paths.py
+++ b/numba/cuda/cuda_paths.py
@@ -25,6 +25,7 @@ def _find_valid_path(options):
 def _get_libdevice_path_decision():
     options = [
         ('Conda environment', get_conda_ctk()),
+        ('Conda environment (NVIDIA package)', get_nvidia_libdevice_ctk()),
         ('CUDA_HOME', get_cuda_home('nvvm', 'libdevice')),
         ('System', get_system_ctk('nvvm', 'libdevice')),
         ('Debian package', get_debian_pkg_libdevice()),
@@ -45,6 +46,7 @@ def _nvvm_lib_dir():
 def _get_nvvm_path_decision():
     options = [
         ('Conda environment', get_conda_ctk()),
+        ('Conda environment (NVIDIA package)', get_nvidia_nvvm_ctk()),
         ('CUDA_HOME', get_cuda_home(*_nvvm_lib_dir())),
         ('System', get_system_ctk(*_nvvm_lib_dir())),
     ]
@@ -80,6 +82,7 @@ def _cudalib_path():
 def _get_cudalib_dir_path_decision():
     options = [
         ('Conda environment', get_conda_ctk()),
+        ('Conda environment (NVIDIA package)', get_nvidia_cudalib_ctk()),
         ('CUDA_HOME', get_cuda_home(_cudalib_path())),
         ('System', get_system_ctk(_cudalib_path())),
     ]
@@ -118,6 +121,43 @@ def get_conda_ctk():
     return os.path.dirname(max(paths))
 
 
+def get_nvidia_nvvm_ctk():
+    """Return path to directory containing the NVVM shared library.
+    """
+    is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta'))
+    if not is_conda_env:
+        return
+    # Asssume the existence of NVVM to imply cudatoolkit installed
+    libdir = os.path.join(sys.prefix, 'nvvm', 'lib64')
+    if not os.path.exists(libdir) or not os.path.isdir(libdir):
+        return
+    paths = find_lib('nvvm', libdir=libdir)
+    if not paths:
+        return
+    # Use the directory name of the max path
+    return os.path.dirname(max(paths))
+
+
+def get_nvidia_libdevice_ctk():
+    """Return path to directory containing the libdevice library.
+    """
+    nvvm_ctk = get_nvidia_nvvm_ctk()
+    if not nvvm_ctk:
+        return
+    nvvm_dir = os.path.dirname(nvvm_ctk)
+    return os.path.join(nvvm_dir, 'libdevice')
+
+
+def get_nvidia_cudalib_ctk():
+    """Return path to directory containing the shared libraries of cudatoolkit.
+    """
+    nvvm_ctk = get_nvidia_nvvm_ctk()
+    if not nvvm_ctk:
+        return
+    env_dir = os.path.dirname(os.path.dirname(nvvm_ctk))
+    return os.path.join(env_dir, 'lib')
+
+
 def get_cuda_home(*subdirs):
     """Get paths of CUDA_HOME.
     If *subdirs* are the subdirectory name to be appended in the resulting

From b735fd3889fcf13c9a6114c7fc26f462c662c851 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Fri, 23 Jul 2021 16:03:30 +0100
Subject: [PATCH 02/45] CUDA: Add libcuda.so to libs.test()

It is useful to show the driver location and test loading it in
libs.test() - this commit adds this functionality, with a couple of
limitations:

- The list of candidates can be shown, but it is not easy to state
  the exact path of all candidates, because the loader's search order is
  not easy to access.
- If the loading succeeds, the exact path of the loaded library may not
  be visible (e.g. it may just show that loading succeeded from
  "libcuda.so").

That said, this should give some insight into the loading of the library
and whether it succeeds, so this seems worthwhile for now - perhaps more
sophisticated interaction with the OS to determine exact locations could
be added later.

Some modification to the `find_driver()` function in driver.py was
needed to expose the relevant information to `libs.test()`.
---
 numba/cuda/cudadrv/driver.py | 15 +++++++++++++--
 numba/cuda/cudadrv/libs.py   | 16 ++++++++++++++++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/numba/cuda/cudadrv/driver.py b/numba/cuda/cudadrv/driver.py
index 348c9e09f3e..94fad7ec22e 100644
--- a/numba/cuda/cudadrv/driver.py
+++ b/numba/cuda/cudadrv/driver.py
@@ -88,7 +88,7 @@ def __str__(self):
         return "[%s] %s" % (self.code, self.msg)
 
 
-def find_driver():
+def locate_driver_and_loader():
 
     envpath = config.CUDA_DRIVER
 
@@ -128,6 +128,11 @@ def find_driver():
         candidates = dlnames + [os.path.join(x, y)
                                 for x, y in product(dldir, dlnames)]
 
+    return dlloader, candidates
+
+
+def load_driver(dlloader, candidates):
+
     # Load the driver; Collect driver error information
     path_not_exist = []
     driver_load_error = []
@@ -140,7 +145,7 @@ def find_driver():
             path_not_exist.append(not os.path.isfile(path))
             driver_load_error.append(e)
         else:
-            return dll
+            return dll, path
 
     # Problem loading driver
     if all(path_not_exist):
@@ -150,6 +155,12 @@ def find_driver():
         _raise_driver_error(errmsg)
 
 
+def find_driver():
+    dlloader, candidates = locate_driver_and_loader()
+    dll, path = load_driver(dlloader, candidates)
+    return dll
+
+
 DRIVER_NOT_FOUND_MSG = """
 CUDA driver library cannot be found.
 If you are sure that a CUDA driver is installed,
diff --git a/numba/cuda/cudadrv/libs.py b/numba/cuda/cudadrv/libs.py
index 3bd80406dc9..704f98004d4 100644
--- a/numba/cuda/cudadrv/libs.py
+++ b/numba/cuda/cudadrv/libs.py
@@ -15,6 +15,9 @@
 
 from numba.misc.findlib import find_lib
 from numba.cuda.cuda_paths import get_cuda_paths
+from numba.cuda.cudadrv.driver import locate_driver_and_loader, load_driver
+from numba.cuda.cudadrv.error import CudaSupportError
+
 
 if sys.platform == 'win32':
     _dllnamepattern = '%s.dll'
@@ -80,6 +83,19 @@ def test(_platform=None, print_paths=True):
     """
     failed = False
 
+    # Check for the driver
+    try:
+        dlloader, candidates = locate_driver_and_loader()
+        locations = ", ".join(candidates)
+        print(f'Finding driver from candidates: {locations}...')
+        print(f'Using loader {dlloader}')
+        print('\ttrying to load driver', end='...')
+        dll, path = load_driver(dlloader, candidates)
+        print(f'\tok, loaded from {path}')
+    except CudaSupportError as e:
+        print(f'\tERROR: failed to open driver: {e}')
+        failed = True
+
     # Checks for dynamic libraries
     libs = 'nvvm cudart'.split()
     for lib in libs:

From 73dfc4d6114b0c88592838cd9d10c79a06ab81a5 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Tue, 27 Jul 2021 11:48:58 +0100
Subject: [PATCH 03/45] Fix DLL locations on Windows

---
 numba/cuda/cuda_paths.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/numba/cuda/cuda_paths.py b/numba/cuda/cuda_paths.py
index 5d3f7190cc7..c0b3ab4938b 100644
--- a/numba/cuda/cuda_paths.py
+++ b/numba/cuda/cuda_paths.py
@@ -128,7 +128,10 @@ def get_nvidia_nvvm_ctk():
     if not is_conda_env:
         return
     # Asssume the existence of NVVM to imply cudatoolkit installed
-    libdir = os.path.join(sys.prefix, 'nvvm', 'lib64')
+    if sys.platform.startswith('linux'):
+        libdir = os.path.join(sys.prefix, 'nvvm', 'lib64')
+    else:
+        libdir = os.path.join(sys.prefix, 'nvvm', 'bin')
     if not os.path.exists(libdir) or not os.path.isdir(libdir):
         return
     paths = find_lib('nvvm', libdir=libdir)
@@ -155,7 +158,11 @@ def get_nvidia_cudalib_ctk():
     if not nvvm_ctk:
         return
     env_dir = os.path.dirname(os.path.dirname(nvvm_ctk))
-    return os.path.join(env_dir, 'lib')
+    if sys.platform.startswith('linux'):
+        libdir = 'lib'
+    else:
+        libdir = 'bin'
+    return os.path.join(env_dir, libdir)
 
 
 def get_cuda_home(*subdirs):

From 5d0306fc34bdda6042c0bbf11e3ba69f04ed95c6 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Tue, 27 Jul 2021 12:41:58 +0100
Subject: [PATCH 04/45] Re-use _cudalib_path() instead of re-implementing logic

---
 numba/cuda/cuda_paths.py | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/numba/cuda/cuda_paths.py b/numba/cuda/cuda_paths.py
index c0b3ab4938b..e5d22d53a75 100644
--- a/numba/cuda/cuda_paths.py
+++ b/numba/cuda/cuda_paths.py
@@ -46,7 +46,7 @@ def _nvvm_lib_dir():
 def _get_nvvm_path_decision():
     options = [
         ('Conda environment', get_conda_ctk()),
-        ('Conda environment (NVIDIA package)', get_nvidia_nvvm_ctk()),
+        ('Conda environment (NVIDIA package)', get_nvidia_nvvm_ctk(_cudalib_path())),
         ('CUDA_HOME', get_cuda_home(*_nvvm_lib_dir())),
         ('System', get_system_ctk(*_nvvm_lib_dir())),
     ]
@@ -82,7 +82,7 @@ def _cudalib_path():
 def _get_cudalib_dir_path_decision():
     options = [
         ('Conda environment', get_conda_ctk()),
-        ('Conda environment (NVIDIA package)', get_nvidia_cudalib_ctk()),
+        ('Conda environment (NVIDIA package)', get_nvidia_cudalib_ctk(_cudalib_path())),
         ('CUDA_HOME', get_cuda_home(_cudalib_path())),
         ('System', get_system_ctk(_cudalib_path())),
     ]
@@ -121,17 +121,14 @@ def get_conda_ctk():
     return os.path.dirname(max(paths))
 
 
-def get_nvidia_nvvm_ctk():
+def get_nvidia_nvvm_ctk(*subdirs):
     """Return path to directory containing the NVVM shared library.
     """
     is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta'))
     if not is_conda_env:
         return
     # Asssume the existence of NVVM to imply cudatoolkit installed
-    if sys.platform.startswith('linux'):
-        libdir = os.path.join(sys.prefix, 'nvvm', 'lib64')
-    else:
-        libdir = os.path.join(sys.prefix, 'nvvm', 'bin')
+    libdir = os.path.join(sys.prefix, 'nvvm', *subdirs)
     if not os.path.exists(libdir) or not os.path.isdir(libdir):
         return
     paths = find_lib('nvvm', libdir=libdir)
@@ -144,25 +141,21 @@ def get_nvidia_nvvm_ctk():
 def get_nvidia_libdevice_ctk():
     """Return path to directory containing the libdevice library.
     """
-    nvvm_ctk = get_nvidia_nvvm_ctk()
+    nvvm_ctk = get_nvidia_nvvm_ctk(_cudalib_path())
     if not nvvm_ctk:
         return
     nvvm_dir = os.path.dirname(nvvm_ctk)
     return os.path.join(nvvm_dir, 'libdevice')
 
 
-def get_nvidia_cudalib_ctk():
+def get_nvidia_cudalib_ctk(*subdirs):
     """Return path to directory containing the shared libraries of cudatoolkit.
     """
-    nvvm_ctk = get_nvidia_nvvm_ctk()
+    nvvm_ctk = get_nvidia_nvvm_ctk(_cudalib_path())
     if not nvvm_ctk:
         return
     env_dir = os.path.dirname(os.path.dirname(nvvm_ctk))
-    if sys.platform.startswith('linux'):
-        libdir = 'lib'
-    else:
-        libdir = 'bin'
-    return os.path.join(env_dir, libdir)
+    return os.path.join(env_dir, *subdirs)
 
 
 def get_cuda_home(*subdirs):

From cc7a783d5757d854fb48298d9f317783ba869725 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Tue, 27 Jul 2021 12:46:44 +0100
Subject: [PATCH 05/45] Don't pass so many paths around

---
 numba/cuda/cuda_paths.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/numba/cuda/cuda_paths.py b/numba/cuda/cuda_paths.py
index e5d22d53a75..1c06a821d45 100644
--- a/numba/cuda/cuda_paths.py
+++ b/numba/cuda/cuda_paths.py
@@ -46,7 +46,7 @@ def _nvvm_lib_dir():
 def _get_nvvm_path_decision():
     options = [
         ('Conda environment', get_conda_ctk()),
-        ('Conda environment (NVIDIA package)', get_nvidia_nvvm_ctk(_cudalib_path())),
+        ('Conda environment (NVIDIA package)', get_nvidia_nvvm_ctk()),
         ('CUDA_HOME', get_cuda_home(*_nvvm_lib_dir())),
         ('System', get_system_ctk(*_nvvm_lib_dir())),
     ]
@@ -82,7 +82,7 @@ def _cudalib_path():
 def _get_cudalib_dir_path_decision():
     options = [
         ('Conda environment', get_conda_ctk()),
-        ('Conda environment (NVIDIA package)', get_nvidia_cudalib_ctk(_cudalib_path())),
+        ('Conda environment (NVIDIA package)', get_nvidia_cudalib_ctk()),
         ('CUDA_HOME', get_cuda_home(_cudalib_path())),
         ('System', get_system_ctk(_cudalib_path())),
     ]
@@ -121,14 +121,14 @@ def get_conda_ctk():
     return os.path.dirname(max(paths))
 
 
-def get_nvidia_nvvm_ctk(*subdirs):
+def get_nvidia_nvvm_ctk():
     """Return path to directory containing the NVVM shared library.
     """
     is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta'))
     if not is_conda_env:
         return
     # Asssume the existence of NVVM to imply cudatoolkit installed
-    libdir = os.path.join(sys.prefix, 'nvvm', *subdirs)
+    libdir = os.path.join(sys.prefix, 'nvvm', _cudalib_path())
     if not os.path.exists(libdir) or not os.path.isdir(libdir):
         return
     paths = find_lib('nvvm', libdir=libdir)
@@ -141,21 +141,21 @@ def get_nvidia_nvvm_ctk(*subdirs):
 def get_nvidia_libdevice_ctk():
     """Return path to directory containing the libdevice library.
     """
-    nvvm_ctk = get_nvidia_nvvm_ctk(_cudalib_path())
+    nvvm_ctk = get_nvidia_nvvm_ctk()
     if not nvvm_ctk:
         return
     nvvm_dir = os.path.dirname(nvvm_ctk)
     return os.path.join(nvvm_dir, 'libdevice')
 
 
-def get_nvidia_cudalib_ctk(*subdirs):
+def get_nvidia_cudalib_ctk():
     """Return path to directory containing the shared libraries of cudatoolkit.
     """
-    nvvm_ctk = get_nvidia_nvvm_ctk(_cudalib_path())
+    nvvm_ctk = get_nvidia_nvvm_ctk()
     if not nvvm_ctk:
         return
     env_dir = os.path.dirname(os.path.dirname(nvvm_ctk))
-    return os.path.join(env_dir, *subdirs)
+    return os.path.join(env_dir, _cudalib_path())
 
 
 def get_cuda_home(*subdirs):

From 474162768cf056b182caf8fb893670dba0d628e1 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Tue, 27 Jul 2021 13:01:04 +0100
Subject: [PATCH 06/45] Fix finding of cudadevrt in both types of conda env on
 Windows

---
 numba/cuda/cuda_paths.py   | 27 +++++++++++++++++++++++++++
 numba/cuda/cudadrv/libs.py | 10 ++++++----
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/numba/cuda/cuda_paths.py b/numba/cuda/cuda_paths.py
index 1c06a821d45..3636480738b 100644
--- a/numba/cuda/cuda_paths.py
+++ b/numba/cuda/cuda_paths.py
@@ -90,11 +90,27 @@ def _get_cudalib_dir_path_decision():
     return by, libdir
 
 
+def _get_static_cudalib_dir_path_decision():
+    options = [
+        ('Conda environment', get_conda_ctk()),
+        ('Conda environment (NVIDIA package)', get_nvidia_static_cudalib_ctk()),
+        ('CUDA_HOME', get_cuda_home(_cudalib_path())),
+        ('System', get_system_ctk(_cudalib_path())),
+    ]
+    by, libdir = _find_valid_path(options)
+    return by, libdir
+
+
 def _get_cudalib_dir():
     by, libdir = _get_cudalib_dir_path_decision()
     return _env_path_tuple(by, libdir)
 
 
+def _get_static_cudalib_dir():
+    by, libdir = _get_static_cudalib_dir_path_decision()
+    return _env_path_tuple(by, libdir)
+
+
 def get_system_ctk(*subdirs):
     """Return path to system-wide cudatoolkit; or, None if it doesn't exist.
     """
@@ -158,6 +174,16 @@ def get_nvidia_cudalib_ctk():
     return os.path.join(env_dir, _cudalib_path())
 
 
+def get_nvidia_static_cudalib_ctk():
+    """Return path to directory containing the static libraries of cudatoolkit.
+    """
+    nvvm_ctk = get_nvidia_nvvm_ctk()
+    if not nvvm_ctk:
+        return
+    env_dir = os.path.dirname(os.path.dirname(nvvm_ctk))
+    return os.path.join(env_dir, 'Lib', 'x64')
+
+
 def get_cuda_home(*subdirs):
     """Get paths of CUDA_HOME.
     If *subdirs* are the subdirectory name to be appended in the resulting
@@ -198,6 +224,7 @@ def get_cuda_paths():
             'nvvm': _get_nvvm_path(),
             'libdevice': _get_libdevice_paths(),
             'cudalib_dir': _get_cudalib_dir(),
+            'static_cudalib_dir': _get_static_cudalib_dir(),
         }
         # Cache result
         get_cuda_paths._cached_result = d
diff --git a/numba/cuda/cudadrv/libs.py b/numba/cuda/cudadrv/libs.py
index 704f98004d4..8c5b6f4feeb 100644
--- a/numba/cuda/cudadrv/libs.py
+++ b/numba/cuda/cudadrv/libs.py
@@ -51,7 +51,8 @@ def get_cudalib(lib, platform=None, static=False):
     if lib == 'nvvm':
         return get_cuda_paths()['nvvm'].info or _dllnamepattern % 'nvvm'
     else:
-        libdir = get_cuda_paths()['cudalib_dir'].info
+        dir_type = 'static_cudalib_dir' if static else 'cudalib_dir'
+        libdir = get_cuda_paths()[dir_type].info
 
     candidates = find_lib(lib, libdir, platform=platform, static=static)
     namepattern = _staticnamepattern if static else _dllnamepattern
@@ -69,13 +70,14 @@ def check_static_lib(lib):
         raise FileNotFoundError(f'{path} not found')
 
 
-def _get_source_variable(lib):
+def _get_source_variable(lib, static=False):
     if lib == 'nvvm':
         return get_cuda_paths()['nvvm'].by
     elif lib == 'libdevice':
         return get_cuda_paths()['libdevice'].by
     else:
-        return get_cuda_paths()['cudalib_dir'].by
+        dir_type = 'static_cudalib_dir' if static else 'cudalib_dir'
+        return get_cuda_paths()[dir_type].by
 
 
 def test(_platform=None, print_paths=True):
@@ -118,7 +120,7 @@ def test(_platform=None, print_paths=True):
     # Check for cudadevrt (the only static library)
     lib = 'cudadevrt'
     path = get_cudalib(lib, _platform, static=True)
-    print('Finding {} from {}'.format(lib, _get_source_variable(lib)))
+    print('Finding {} from {}'.format(lib, _get_source_variable(lib, static=True)))
     if print_paths:
         print('\tlocated at', path)
     else:

From 70eccb0bdd427834e7d04be2b27d87e69b848263 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Tue, 27 Jul 2021 13:08:01 +0100
Subject: [PATCH 07/45] Fix finding of libcudadevrt from CUDA_HOME on Windows

---
 numba/cuda/cuda_paths.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/numba/cuda/cuda_paths.py b/numba/cuda/cuda_paths.py
index 3636480738b..bd290863196 100644
--- a/numba/cuda/cuda_paths.py
+++ b/numba/cuda/cuda_paths.py
@@ -78,6 +78,12 @@ def _cudalib_path():
     else:
         return 'lib64'
 
+def _static_cudalib_path():
+    if IS_WIN32:
+        return ('lib', 'x64')
+    else:
+        return ('lib64',)
+
 
 def _get_cudalib_dir_path_decision():
     options = [
@@ -94,7 +100,7 @@ def _get_static_cudalib_dir_path_decision():
     options = [
         ('Conda environment', get_conda_ctk()),
         ('Conda environment (NVIDIA package)', get_nvidia_static_cudalib_ctk()),
-        ('CUDA_HOME', get_cuda_home(_cudalib_path())),
+        ('CUDA_HOME', get_cuda_home(*_static_cudalib_path())),
         ('System', get_system_ctk(_cudalib_path())),
     ]
     by, libdir = _find_valid_path(options)

From 14a3de62c3036a3144fc01d482e1cfc7381ec334 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Tue, 27 Jul 2021 15:57:35 +0100
Subject: [PATCH 08/45] Fix lib dirs for NVIDIA packages on Linux

---
 numba/cuda/cuda_paths.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/numba/cuda/cuda_paths.py b/numba/cuda/cuda_paths.py
index bd290863196..528ce7904ae 100644
--- a/numba/cuda/cuda_paths.py
+++ b/numba/cuda/cuda_paths.py
@@ -78,6 +78,7 @@ def _cudalib_path():
     else:
         return 'lib64'
 
+
 def _static_cudalib_path():
     if IS_WIN32:
         return ('lib', 'x64')
@@ -177,7 +178,8 @@ def get_nvidia_cudalib_ctk():
     if not nvvm_ctk:
         return
     env_dir = os.path.dirname(os.path.dirname(nvvm_ctk))
-    return os.path.join(env_dir, _cudalib_path())
+    subdir = 'bin' if IS_WIN32 else 'lib'
+    return os.path.join(env_dir, subdir)
 
 
 def get_nvidia_static_cudalib_ctk():
@@ -187,7 +189,8 @@ def get_nvidia_static_cudalib_ctk():
     if not nvvm_ctk:
         return
     env_dir = os.path.dirname(os.path.dirname(nvvm_ctk))
-    return os.path.join(env_dir, 'Lib', 'x64')
+    dirs = ('Lib', 'x64') if IS_WIN32 else ('lib',)
+    return os.path.join(env_dir, *dirs)
 
 
 def get_cuda_home(*subdirs):

From 0aeca128999a89efa4ac42cf68f16db43a2ec596 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Tue, 27 Jul 2021 16:29:35 +0100
Subject: [PATCH 09/45] Fix flake8

---
 numba/cuda/cudadrv/libs.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/numba/cuda/cudadrv/libs.py b/numba/cuda/cudadrv/libs.py
index 8c5b6f4feeb..ce2e26861b4 100644
--- a/numba/cuda/cudadrv/libs.py
+++ b/numba/cuda/cudadrv/libs.py
@@ -120,7 +120,8 @@ def test(_platform=None, print_paths=True):
     # Check for cudadevrt (the only static library)
     lib = 'cudadevrt'
     path = get_cudalib(lib, _platform, static=True)
-    print('Finding {} from {}'.format(lib, _get_source_variable(lib, static=True)))
+    print('Finding {} from {}'.format(lib, _get_source_variable(lib,
+                                                                static=True)))
     if print_paths:
         print('\tlocated at', path)
     else:

From 8e374525c2ab0dfd5249bc98b684a304031d4a7a Mon Sep 17 00:00:00 2001
From: Andre Masella <andre@masella.name>
Date: Wed, 4 May 2022 16:38:22 -0400
Subject: [PATCH 10/45] Allow converting NumPy datetimes to int

Add support for `int(numpy.datetime64)` and `int(numpy.timedelta64)`.
---
 numba/core/typing/builtins.py  | 11 +++++++++--
 numba/tests/test_npdatetime.py |  3 +++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/numba/core/typing/builtins.py b/numba/core/typing/builtins.py
index eb883cb5f81..c1dcca827f2 100644
--- a/numba/core/typing/builtins.py
+++ b/numba/core/typing/builtins.py
@@ -796,7 +796,7 @@ class NumberClassAttribute(AttributeTemplate):
 
     def resolve___call__(self, classty):
         """
-        Resolve a number class's constructor (e.g. calling int(...))
+        Resolve a NumPy number class's constructor (e.g. calling numpy.int32(...))
         """
         ty = classty.instance_type
 
@@ -841,7 +841,7 @@ class TypeRefAttribute(AttributeTemplate):
 
     def resolve___call__(self, classty):
         """
-        Resolve a number class's constructor (e.g. calling int(...))
+        Resolve a core number's constructor (e.g. calling int(...))
 
         Note:
 
@@ -960,6 +960,13 @@ def generic(self, args, kws):
             return signature(arg, arg)
         if isinstance(arg, (types.Float, types.Boolean)):
             return signature(types.intp, arg)
+        if isinstance(arg, types.NPDatetime):
+            if arg.unit == 'ns':
+                return signature(types.int64, arg)
+            else:
+                raise errors.NumbaTypeError(f"Only datetime64[ns] can be converted, but got {arg.unit}")
+        if isinstance(arg, types.NPTimedelta):
+            return signature(types.int64, arg)
 
 
 @infer_global(float)
diff --git a/numba/tests/test_npdatetime.py b/numba/tests/test_npdatetime.py
index 713fb6c6cca..294b455291b 100644
--- a/numba/tests/test_npdatetime.py
+++ b/numba/tests/test_npdatetime.py
@@ -85,6 +85,9 @@ def min_usecase(x, y):
 def max_usecase(x, y):
     return max(x, y)
 
+def int_cast_usecase(x):
+    return int(x)
+
 def make_add_constant(const):
     def add_constant(x):
         return x + const

From 648f9be95fa5d34984a215b426bb178b6511ec43 Mon Sep 17 00:00:00 2001
From: Andre Masella <andre@masella.name>
Date: Thu, 2 Jun 2022 14:09:24 -0400
Subject: [PATCH 11/45] Improve datetime unit conversion error

As titled.

Co-authored-by: Siu Kwan Lam <1929845+sklam@users.noreply.github.com>
---
 numba/core/typing/builtins.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numba/core/typing/builtins.py b/numba/core/typing/builtins.py
index c1dcca827f2..f31f18c8d66 100644
--- a/numba/core/typing/builtins.py
+++ b/numba/core/typing/builtins.py
@@ -964,7 +964,7 @@ def generic(self, args, kws):
             if arg.unit == 'ns':
                 return signature(types.int64, arg)
             else:
-                raise errors.NumbaTypeError(f"Only datetime64[ns] can be converted, but got {arg.unit}")
+                raise errors.NumbaTypeError(f"Only datetime64[ns] can be converted, but got datetime64[{arg.unit}]")
         if isinstance(arg, types.NPTimedelta):
             return signature(types.int64, arg)
 

From fe79ae6ca04266317591948909d29f505453b59e Mon Sep 17 00:00:00 2001
From: Andre Masella <andre@masella.name>
Date: Thu, 2 Jun 2022 14:14:00 -0400
Subject: [PATCH 12/45] Check type conversion exception

Ensure the type conversion errors includes the desired message
---
 numba/tests/test_npdatetime.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/numba/tests/test_npdatetime.py b/numba/tests/test_npdatetime.py
index 294b455291b..2c865dfef4d 100644
--- a/numba/tests/test_npdatetime.py
+++ b/numba/tests/test_npdatetime.py
@@ -576,6 +576,27 @@ class TestTimedeltaArithmeticNoPython(TestTimedeltaArithmetic):
 
     jitargs = dict(nopython=True)
 
+    def test_int_cast(self):
+        f = self.jit(int_cast_usecase)
+        def check(a):
+            self.assertPreciseEqual(f(a), int(a))
+
+        for (delta, unit) in ((3, 'ns'), (-4, 'ns'), (30000, 'ns'),
+                        (-40000000, 'ns'), (1, 'Y')):
+            check(TD(delta, unit).astype('timedelta64[ns]'))
+
+        for time in ('2014', '2016', '2000', '2014-02', '2014-03', '2014-04',
+                     '2016-02', '2000-12-31', '2014-01-16', '2014-01-05',
+                     '2014-01-07', '2014-01-06', '2014-02-02', '2014-02-27',
+                     '2014-02-16', '2014-03-01', '2000-01-01T01:02:03.002Z',
+                     '2000-01-01T01:02:03Z'):
+            check(DT(time).astype('datetime64[ns]'))
+
+        with self.assertRaises(TypingError, msg=('Only datetime64[ns] can be ' +
+                                                 'converted, but got ' +
+                                                 'datetime64[y]')):
+            f(DT('2014'))
+
 
 class TestDatetimeArithmetic(TestCase):
 

From 53a1805d8be2ddad5e8c1fe03265bdcd581b376f Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Tue, 8 Nov 2022 08:51:56 +0000
Subject: [PATCH 13/45] CUDA: Fix inspect_llvm()

Only the wrapper kernel IR was being returned by inspect_llvm() - it
should return the IR for all modules, joining them together in a
similar manner to how PTX code is joined for inspect_asm(). The
underlying implementation is in the CUDACodeLibrary, so it is the
get_llvm_str() we need to fix.
---
 numba/cuda/codegen.py                   |  2 +-
 numba/cuda/tests/cudapy/test_inspect.py | 17 ++++++++++++++++-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/numba/cuda/codegen.py b/numba/cuda/codegen.py
index 5870f77266a..50af19e525a 100644
--- a/numba/cuda/codegen.py
+++ b/numba/cuda/codegen.py
@@ -103,7 +103,7 @@ def llvm_strs(self):
         return self._llvm_strs
 
     def get_llvm_str(self):
-        return self.llvm_strs[0]
+        return "\n\n".join(self.llvm_strs)
 
     def get_asm_str(self, cc=None):
         return self._join_ptxes(self._get_ptxes(cc=cc))
diff --git a/numba/cuda/tests/cudapy/test_inspect.py b/numba/cuda/tests/cudapy/test_inspect.py
index 53f39753c55..b6be8e6a924 100644
--- a/numba/cuda/tests/cudapy/test_inspect.py
+++ b/numba/cuda/tests/cudapy/test_inspect.py
@@ -29,7 +29,14 @@ def foo(x, y):
         self.assertIn("(float32, int32)", typeanno)
         file.close()
         # Function name in LLVM
-        self.assertIn("foo", foo.inspect_llvm(sig))
+        llvm = foo.inspect_llvm(sig)
+        self.assertIn("foo", llvm)
+
+        # Kernel in LLVM
+        self.assertIn('cuda.kernel.wrapper', llvm)
+
+        # Wrapped device function body in LLVM
+        self.assertIn("define linkonce_odr i32", llvm)
 
         asm = foo.inspect_asm(sig)
 
@@ -64,6 +71,14 @@ def foo(x, y):
         self.assertIn("foo", llvmirs[intp, intp])
         self.assertIn("foo", llvmirs[float64, float64])
 
+        # Kernels in LLVM
+        self.assertIn('cuda.kernel.wrapper', llvmirs[intp, intp])
+        self.assertIn('cuda.kernel.wrapper', llvmirs[float64, float64])
+
+        # Wrapped device function bodies in LLVM
+        self.assertIn("define linkonce_odr i32", llvmirs[intp, intp])
+        self.assertIn("define linkonce_odr i32", llvmirs[float64, float64])
+
         asmdict = foo.inspect_asm()
 
         # Signature in assembly dict

From 4e8d85177e1f7b87cd8278431c9ed29ea056df4e Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Tue, 8 Nov 2022 09:09:38 +0000
Subject: [PATCH 14/45] CUDA: Maintain error model when enabling lineinfo

Enabling lineinfo switched the error model from NumPy to Python for CUDA
kernels. This is not desired, and is an error, because lineinfo is used
for profiling purposes - it should not affect the generated code,
particularly when doing so has a negative impact on performance.
---
 numba/cuda/compiler.py                   | 12 ++++++++----
 numba/cuda/tests/cudapy/test_lineinfo.py | 22 ++++++++++++++++++----
 2 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/numba/cuda/compiler.py b/numba/cuda/compiler.py
index 6fe098a6fe4..87cd7be4ca8 100644
--- a/numba/cuda/compiler.py
+++ b/numba/cuda/compiler.py
@@ -190,12 +190,16 @@ def compile_cuda(pyfunc, return_type, args, debug=False, lineinfo=False,
     flags.no_compile = True
     flags.no_cpython_wrapper = True
     flags.no_cfunc_wrapper = True
+
+    # Both debug and lineinfo turn on debug information in the compiled code,
+    # but we keep them separate arguments in case we later want to overload
+    # some other behavior on the debug flag. In particular, -opt=3 is not
+    # supported with debug enabled, and enabling only lineinfo should not
+    # affect the error model.
     if debug or lineinfo:
-        # Note both debug and lineinfo turn on debug information in the
-        # compiled code, but we keep them separate arguments in case we
-        # later want to overload some other behavior on the debug flag.
-        # In particular, -opt=3 is not supported with -g.
         flags.debuginfo = True
+
+    if debug:
         flags.error_model = 'python'
     else:
         flags.error_model = 'numpy'
diff --git a/numba/cuda/tests/cudapy/test_lineinfo.py b/numba/cuda/tests/cudapy/test_lineinfo.py
index 9ebdcc4845e..265cfefb5f8 100644
--- a/numba/cuda/tests/cudapy/test_lineinfo.py
+++ b/numba/cuda/tests/cudapy/test_lineinfo.py
@@ -1,6 +1,5 @@
 from numba.cuda.testing import skip_on_cudasim
-from numba import cuda
-from numba.core import types
+from numba import cuda, float32, int32
 from numba.cuda.testing import CUDATestCase
 import re
 import unittest
@@ -29,14 +28,29 @@ def test_no_lineinfo_in_asm(self):
         def foo(x):
             x[0] = 1
 
-        self._check(foo, sig=(types.int32[:],), expect=False)
+        self._check(foo, sig=(int32[:],), expect=False)
 
     def test_lineinfo_in_asm(self):
         @cuda.jit(lineinfo=True)
         def foo(x):
             x[0] = 1
 
-        self._check(foo, sig=(types.int32[:],), expect=True)
+        self._check(foo, sig=(int32[:],), expect=True)
+
+    def test_lineinfo_maintains_error_model(self):
+        sig = (float32[::1], float32[::1])
+
+        @cuda.jit(sig, lineinfo=True)
+        def divide_kernel(x, y):
+            x[0] /= y[0]
+
+        llvm = divide_kernel.inspect_llvm(sig)
+
+        # When the error model is Python, the device function returns 1 to
+        # signal an exception (e.g. divide by zero) has occurred. When the
+        # error model is the default NumPy one (as it should be when only
+        # lineinfo is enabled) the device function always returns 0.
+        self.assertNotIn('ret i32 1', llvm)
 
 
 if __name__ == '__main__':

From f325b5bd5972f25151b1288ab543bc9884dbfc98 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Tue, 8 Nov 2022 09:43:37 +0000
Subject: [PATCH 15/45] CUDA; Remove NVVM debug and lineinfo options

Since the move the NVVM 7.0, the debug and lineinfo flags are ignored
and only accepted for backwards compatibility - the decision to emit
debug or line info is made based on the presence of debug info in the
IR.
---
 numba/cuda/codegen.py      | 33 ++-------------------------------
 numba/cuda/compiler.py     |  2 --
 numba/cuda/cudadrv/nvvm.py | 13 +++----------
 numba/cuda/dispatcher.py   |  3 ---
 4 files changed, 5 insertions(+), 46 deletions(-)

diff --git a/numba/cuda/codegen.py b/numba/cuda/codegen.py
index 50af19e525a..070d145c203 100644
--- a/numba/cuda/codegen.py
+++ b/numba/cuda/codegen.py
@@ -1,9 +1,7 @@
 from llvmlite import ir
-from warnings import warn
 
 from numba.core import config, serialize
 from numba.core.codegen import Codegen, CodeLibrary
-from numba.core.errors import NumbaInvalidConfigWarning
 from .cudadrv import devices, driver, nvvm, runtime
 
 import ctypes
@@ -121,30 +119,10 @@ def _get_ptxes(self, cc=None):
         arch = nvvm.get_arch_option(*cc)
         options = self._nvvm_options.copy()
         options['arch'] = arch
-        if not nvvm.NVVM().is_nvvm70:
-            # Avoid enabling debug for NVVM 3.4 as it has various issues. We
-            # need to warn the user that we're doing this if any of the
-            # functions that they're compiling have `debug=True` set, which we
-            # can determine by checking the NVVM options.
-            for lib in self.linking_libraries:
-                if lib._nvvm_options.get('debug'):
-                    msg = ("debuginfo is not generated for CUDA versions "
-                           f"< 11.2 (debug=True on function: {lib.name})")
-                    warn(NumbaInvalidConfigWarning(msg))
-            options['debug'] = False
 
         irs = self.llvm_strs
 
-        if options.get('debug', False):
-            # If we're compiling with debug, we need to compile modules with
-            # NVVM one at a time, because it does not support multiple modules
-            # with debug enabled:
-            # https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#source-level-debugging-support
-            ptxes = [nvvm.llvm_to_ptx(ir, **options) for ir in irs]
-        else:
-            # Otherwise, we compile all modules with NVVM at once because this
-            # results in better optimization than separate compilation.
-            ptxes = [nvvm.llvm_to_ptx(irs, **options)]
+        ptxes = [nvvm.llvm_to_ptx(irs, **options)]
 
         # Sometimes the result from NVVM contains trailing whitespace and
         # nulls, which we strip so that the assembly dump looks a little
@@ -283,18 +261,11 @@ def finalize(self):
         #
         # See also discussion on PR #890:
         # https://github.com/numba/numba/pull/890
-        #
-        # We don't adjust the linkage of functions when compiling for debug -
-        # because the device functions are in separate modules, we need them to
-        # be externally visible.
         for library in self._linking_libraries:
             for mod in library.modules:
                 for fn in mod.functions:
                     if not fn.is_declaration:
-                        if self._nvvm_options.get('debug', False):
-                            fn.linkage = 'weak_odr'
-                        else:
-                            fn.linkage = 'linkonce_odr'
+                        fn.linkage = 'linkonce_odr'
 
         self._finalized = True
 
diff --git a/numba/cuda/compiler.py b/numba/cuda/compiler.py
index 87cd7be4ca8..979f9ea1ef2 100644
--- a/numba/cuda/compiler.py
+++ b/numba/cuda/compiler.py
@@ -264,8 +264,6 @@ def compile_ptx(pyfunc, args, debug=False, lineinfo=False, device=False,
         warn(NumbaInvalidConfigWarning(msg))
 
     nvvm_options = {
-        'debug': debug,
-        'lineinfo': lineinfo,
         'fastmath': fastmath,
         'opt': 3 if opt else 0
     }
diff --git a/numba/cuda/cudadrv/nvvm.py b/numba/cuda/cudadrv/nvvm.py
index c65e0c777d2..fc7c72cf0ea 100644
--- a/numba/cuda/cudadrv/nvvm.py
+++ b/numba/cuda/cudadrv/nvvm.py
@@ -237,14 +237,13 @@ def compile(self, **options):
 
         The valid compiler options are
 
-         *   - -g (enable generation of debugging information)
          *   - -opt=
          *     - 0 (disable optimizations)
          *     - 3 (default, enable optimizations)
          *   - -arch=
-         *     - compute_20 (default)
-         *     - compute_30
-         *     - compute_35
+         *     - compute_XX where XX is in (35, 37, 50, 52, 53, 60, 61, 62, 70,
+         *                                  72, 75, 80, 86, 89, 90).
+         *       The default is compute_52.
          *   - -ftz=
          *     - 0 (default, preserve denormal values, when performing
          *          single-precision floating-point operations)
@@ -268,12 +267,6 @@ def compile(self, **options):
 
         # stringify options
         opts = []
-        if 'debug' in options:
-            if options.pop('debug'):
-                opts.append('-g')
-
-        if options.pop('lineinfo', False):
-            opts.append('-generate-line-info')
 
         if 'opt' in options:
             opts.append('-opt=%d' % options.pop('opt'))
diff --git a/numba/cuda/dispatcher.py b/numba/cuda/dispatcher.py
index 06b7b7494a4..f82b52d001e 100644
--- a/numba/cuda/dispatcher.py
+++ b/numba/cuda/dispatcher.py
@@ -76,8 +76,6 @@ def __init__(self, py_func, argtypes, link=None, debug=False,
         self.extensions = extensions or []
 
         nvvm_options = {
-            'debug': self.debug,
-            'lineinfo': self.lineinfo,
             'fastmath': fastmath,
             'opt': 3 if opt else 0
         }
@@ -839,7 +837,6 @@ def compile_device(self, args):
                 fastmath = self.targetoptions.get('fastmath')
 
                 nvvm_options = {
-                    'debug': debug,
                     'opt': 3 if self.targetoptions.get('opt') else 0,
                     'fastmath': fastmath
                 }

From e448f30033e85d3b4245403d1b56dc9e4bab08ae Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Tue, 8 Nov 2022 11:02:17 +0000
Subject: [PATCH 16/45] Remove NvvmDIBuilder

Debug info generation has been disabled / unsupported on NVVM 3.4 for a
long time now - this removes the infrastructure that helped to generate
it (NvvmDIBuilder).

The warning about not generating debug info on NVVM 3.4 does become a
little less specific now, in that it doesn't mention the function name
responsible - however, the warning has been in place for a long time now
and the next release of Numba is the last release that will support NVVM
3.4, so this doesn't seem like a large issue.

Some tests were still testing some aspects of NVVM 3.4 debug generation
- these are now skipped.
---
 numba/core/debuginfo.py                   | 154 ----------------------
 numba/cuda/target.py                      |   6 +-
 numba/cuda/tests/cudapy/test_compiler.py  |   6 +
 numba/cuda/tests/cudapy/test_debuginfo.py |  40 ++----
 numba/cuda/tests/cudapy/test_lineinfo.py  |   4 +
 5 files changed, 29 insertions(+), 181 deletions(-)

diff --git a/numba/core/debuginfo.py b/numba/core/debuginfo.py
index 692241a35da..4e2ad17bcb9 100644
--- a/numba/core/debuginfo.py
+++ b/numba/core/debuginfo.py
@@ -453,157 +453,3 @@ def _di_location(self, line):
             'column': 1,
             'scope': self.subprograms[-1],
         })
-
-
-class NvvmDIBuilder(DIBuilder):
-    """
-    Only implemented the minimal metadata to get line number information.
-    See http://llvm.org/releases/3.4/docs/LangRef.html
-    """
-    # These constants are copied from llvm3.4
-    DW_LANG_Python = 0x0014
-    DI_Compile_unit = 786449
-    DI_Subroutine_type = 786453
-    DI_Subprogram = 786478
-    DI_File = 786473
-
-    DWARF_VERSION = None  # don't emit DWARF version
-    DEBUG_INFO_VERSION = 1  # as required by NVVM IR Spec
-    # Rename DIComputeUnit MD to hide it from llvm.parse_assembly()
-    # which strips invalid/outdated debug metadata
-    DBG_CU_NAME = 'numba.llvm.dbg.cu'
-
-    # Default member
-    # Used in mark_location to remember last lineno to avoid duplication
-    _last_lineno = None
-
-    def mark_variable(self, builder, allocavalue, name, lltype, size, line,
-                      datamodel=None, argidx=None):
-        # unsupported
-        pass
-
-    def mark_location(self, builder, line):
-        # Avoid duplication
-        if self._last_lineno == line:
-            return
-        self._last_lineno = line
-        # Add call to an inline asm to mark line location
-        asmty = ir.FunctionType(ir.VoidType(), [])
-        asm = ir.InlineAsm(asmty, "// dbg {}".format(line), "",
-                           side_effect=True)
-        call = builder.call(asm, [])
-        md = self._di_location(line)
-        call.set_metadata('numba.dbg', md)
-
-    def mark_subprogram(self, function, qualname, argnames, argtypes, line):
-        argmap = dict(zip(argnames, argtypes))
-        self._add_subprogram(name=qualname, linkagename=function.name,
-                             line=line)
-
-    def _add_subprogram(self, name, linkagename, line):
-        """Emit subprogram metadata
-        """
-        subp = self._di_subprogram(name, linkagename, line)
-        self.subprograms.append(subp)
-        return subp
-
-    #
-    # Helper methods to create the metadata nodes.
-    #
-
-    def _filepair(self):
-        return self.module.add_metadata([
-            os.path.basename(self.filepath),
-            os.path.dirname(self.filepath),
-        ])
-
-    def _di_file(self):
-        return self.module.add_metadata([
-            self._const_int(self.DI_File),
-            self._filepair(),
-        ])
-
-    def _di_compile_unit(self):
-        filepair = self._filepair()
-        empty = self.module.add_metadata([self._const_int(0)])
-        sp_metadata = self.module.add_metadata(self.subprograms)
-        return self.module.add_metadata([
-            self._const_int(self.DI_Compile_unit),         # tag
-            filepair,                   # source directory and file pair
-            self._const_int(self.DW_LANG_Python),  # language
-            'Numba',                     # producer
-            self._const_bool(True),      # optimized
-            "",                          # flags??
-            self._const_int(0),          # runtime version
-            empty,                       # enums types
-            empty,                       # retained types
-            self.module.add_metadata(self.subprograms),  # subprograms
-            empty,                       # global variables
-            empty,                       # imported entities
-            "",                          # split debug filename
-        ])
-
-    def _di_subroutine_type(self):
-        types = self.module.add_metadata([None])
-        return self.module.add_metadata([
-            self._const_int(self.DI_Subroutine_type),                # tag
-            self._const_int(0),
-            None,
-            "",
-            self._const_int(0),                 # line of definition
-            self._const_int(0, 64),             # size in bits
-            self._const_int(0, 64),             # offset in bits
-            self._const_int(0, 64),             # align in bits
-            self._const_int(0),                 # flags
-            None,
-            types,
-            self._const_int(0),
-            None,
-            None,
-            None,
-        ])
-
-    def _di_subprogram(self, name, linkagename, line):
-        function_ptr = self.module.get_global(linkagename)
-        subroutine_type = self._di_subroutine_type()
-        funcvars = self.module.add_metadata([self._const_int(0)])
-        context = self._di_file()
-        return self.module.add_metadata([
-            self._const_int(self.DI_Subprogram),   # tag
-            self._filepair(),          # source dir & file
-            context,                   # context descriptor
-            name,                      # name
-            name,                      # display name
-            linkagename,               # linkage name
-            self._const_int(line),     # line
-            subroutine_type,           # type descriptor
-            self._const_bool(False),   # is local
-            self._const_bool(True),    # is definition
-            self._const_int(0),        # virtuality
-            self._const_int(0),        # virtual function index
-            None,                     # vtable base type
-            self._const_int(0),        # flags
-            self._const_bool(True),    # is optimized
-            function_ptr,              # pointer to function
-            None,                      # function template parameters
-            None,                      # function declaration descriptor
-            funcvars,                     # function variables
-            self._const_int(line)      # scope line
-        ])
-
-    def _di_location(self, line):
-        return self.module.add_metadata([
-            self._const_int(line),   # line
-            self._const_int(0),      # column
-            self.subprograms[-1],    # scope
-            None,                    # original scope
-        ])
-
-    def initialize(self):
-        pass
-
-    def finalize(self):
-        # We create the compile unit at this point because subprograms is
-        # populated and can be referred to by the compile unit.
-        self.dicompileunit = self._di_compile_unit()
-        super().finalize()
diff --git a/numba/cuda/target.py b/numba/cuda/target.py
index 6816cc4e186..f794cb98a0e 100644
--- a/numba/cuda/target.py
+++ b/numba/cuda/target.py
@@ -4,6 +4,7 @@
 
 from numba.core import typing, types, debuginfo, itanium_mangler, cgutils
 from numba.core.dispatcher import Dispatcher
+from numba.core.errors import NumbaInvalidConfigWarning
 from numba.core.utils import cached_property
 from numba.core.base import BaseContext
 from numba.core.callconv import MinimalCallConv
@@ -12,6 +13,7 @@
 from .cudadrv import nvvm
 from numba.cuda import codegen, nvvmutils
 
+from warnings import warn
 
 # -----------------------------------------------------------------------------
 # Typing
@@ -73,7 +75,9 @@ def DIBuilder(self):
         if nvvm.NVVM().is_nvvm70:
             return debuginfo.DIBuilder
         else:
-            return debuginfo.NvvmDIBuilder
+            msg = "debuginfo is not generated for CUDA toolkits < 11.2"
+            warn(NumbaInvalidConfigWarning(msg))
+            return debuginfo.DummyDIBuilder
 
     @property
     def enable_boundscheck(self):
diff --git a/numba/cuda/tests/cudapy/test_compiler.py b/numba/cuda/tests/cudapy/test_compiler.py
index 94561511bc6..bef1fb14ae9 100644
--- a/numba/cuda/tests/cudapy/test_compiler.py
+++ b/numba/cuda/tests/cudapy/test_compiler.py
@@ -103,6 +103,9 @@ def check_line_info(self, ptx):
         self.assertRegex(ptx, '\\.file.*test_compiler.py"')
 
     def test_device_function_with_line_info(self):
+        if not NVVM().is_nvvm70:
+            self.skipTest('lineinfo not generated for NVVM 3.4')
+
         def f():
             pass
 
@@ -110,6 +113,9 @@ def f():
         self.check_line_info(ptx)
 
     def test_kernel_with_line_info(self):
+        if not NVVM().is_nvvm70:
+            self.skipTest('lineinfo not generated for NVVM 3.4')
+
         def f():
             pass
 
diff --git a/numba/cuda/tests/cudapy/test_debuginfo.py b/numba/cuda/tests/cudapy/test_debuginfo.py
index 4552695ec7e..06e53ea5a2d 100644
--- a/numba/cuda/tests/cudapy/test_debuginfo.py
+++ b/numba/cuda/tests/cudapy/test_debuginfo.py
@@ -69,6 +69,9 @@ def f(x):
             x[0] = 0
 
     def test_wrapper_has_debuginfo(self):
+        if not NVVM().is_nvvm70:
+            self.skipTest("debuginfo not generated for NVVM 3.4")
+
         sig = (types.int32[::1],)
 
         @cuda.jit(sig, debug=True, opt=0)
@@ -77,30 +80,14 @@ def f(x):
 
         llvm_ir = f.inspect_llvm(sig)
 
-        if NVVM().is_nvvm70:
-            # NNVM 7.0 IR attaches a debug metadata reference to the
-            # definition
-            defines = [line for line in llvm_ir.splitlines()
-                       if 'define void @"_ZN6cudapy' in line]
-
-            # Make sure we only found one definition
-            self.assertEqual(len(defines), 1)
-
-            wrapper_define = defines[0]
-            self.assertIn('!dbg', wrapper_define)
-        else:
-            # NVVM 3.4 subprogram debuginfo refers to the definition.
-            # '786478' is a constant referring to a subprogram.
-            disubprograms = [line for line in llvm_ir.splitlines()
-                             if '786478' in line]
+        defines = [line for line in llvm_ir.splitlines()
+                   if 'define void @"_ZN6cudapy' in line]
 
-            # Make sure we only found one subprogram
-            self.assertEqual(len(disubprograms), 1)
+        # Make sure we only found one definition
+        self.assertEqual(len(defines), 1)
 
-            wrapper_disubprogram = disubprograms[0]
-            # Check that the subprogram points to a wrapper (these are all in
-            # the "cudapy::" namespace).
-            self.assertIn('_ZN6cudapy', wrapper_disubprogram)
+        wrapper_define = defines[0]
+        self.assertIn('!dbg', wrapper_define)
 
     def test_debug_function_calls_internal_impl(self):
         # Calling a function in a module generated from an implementation
@@ -199,12 +186,13 @@ def test_chained_device_function_two_calls(self):
                                                              f1_debug,
                                                              f2_debug)
 
-    def check_warnings(self, warnings, warn_count):
+    def check_warnings(self, warnings, warning_expected):
         if NVVM().is_nvvm70:
             # We should not warn on NVVM 7.0.
             self.assertEqual(len(warnings), 0)
         else:
-            self.assertEqual(len(warnings), warn_count)
+            if warning_expected:
+                self.assertGreater(len(warnings), 0)
             # Each warning should warn about not generating debug info.
             for warning in warnings:
                 self.assertIs(warning.category, NumbaInvalidConfigWarning)
@@ -225,8 +213,8 @@ def test_debug_warning(self):
                     self._test_chained_device_function_two_calls(kernel_debug,
                                                                  f1_debug,
                                                                  f2_debug)
-                warn_count = kernel_debug + f1_debug + f2_debug
-                self.check_warnings(w, warn_count)
+                warning_expected = kernel_debug or f1_debug or f2_debug
+                self.check_warnings(w, warning_expected)
 
     def test_chained_device_three_functions(self):
         # Like test_chained_device_function, but with enough functions (three)
diff --git a/numba/cuda/tests/cudapy/test_lineinfo.py b/numba/cuda/tests/cudapy/test_lineinfo.py
index 265cfefb5f8..598fc66468f 100644
--- a/numba/cuda/tests/cudapy/test_lineinfo.py
+++ b/numba/cuda/tests/cudapy/test_lineinfo.py
@@ -1,6 +1,7 @@
 from numba.cuda.testing import skip_on_cudasim
 from numba import cuda, float32, int32
 from numba.cuda.testing import CUDATestCase
+from numba.cuda.cudadrv.nvvm import NVVM
 import re
 import unittest
 
@@ -31,6 +32,9 @@ def foo(x):
         self._check(foo, sig=(int32[:],), expect=False)
 
     def test_lineinfo_in_asm(self):
+        if not NVVM().is_nvvm70:
+            self.skipTest("debuginfo not generated for NVVM 3.4")
+
         @cuda.jit(lineinfo=True)
         def foo(x):
             x[0] = 1

From 9d1b5898ed77f5fe9f0c29859cb5bda548858fb0 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Tue, 8 Nov 2022 15:53:00 +0000
Subject: [PATCH 17/45] Fix CUDA lineinfo issues

Line info emission suffered from two problems:

1. It was missing for device functions.
2. Asking Numba to generate lineinfo for kernels resulted in full debug
   info and deoptimizations (many redundant moves in the generated SASS,
   for example) being turned on.

The first issue was probably introduced by a device function /
dispatcher refactor, that resulted in the lineinfo flag getting lost,
and has a simple fix: we ensure that `lineinfo` is passed to
`compile_cuda()` as appropriate in `Dispatcher.compile_device()`. This
regression (or perhaps an issue from the beginning of lineinfo support)
was able to exist because there was no test of lineinfo in device
functions, so extra testing is added to ensure it remains correct.

The second issue was caused by the `emissionKind` of the debug info. As
well as the decision to emit debug info being made based on its presence
or absence in the LLVM IR, the choice between full debug info (and
deoptimization) or just lineinfo directives is also controlled by what
appears in the IR. When the `emissionKind` is `FullDebug`, then full
debug info is emitted and deoptimized code is generated. To generate
lineinfo only, the `emissionKind` needs to be `DebugDirectivesOnly` for
PTX generation (this is common between both NVVM and the upstream NVPTX
target). The change in LLVM that introduced this is
https://reviews.llvm.org/D46021.

To implement control of the `emissionKind`, the following changes are
made:

- Changing the `emissionKind` to be a `DIToken`. This is not strictly
  necessary right now, as we could still emit integer values (e.g. 1 for
  `FullDebug` or 3 for `DebugDirectivesOnly`, but it makes it harder to
  infer what is going on to a casual reader of the emitted IR.
- Addition of a new flag `dbg_directives_only` to control whether only
  the `.loc` and `.file` directives are emitted, as opposed to full
  debug info. This flag is only enabled by the CUDA target when
  `lineinfo` is `True`.
- Only disabling SROA-like passes when debuginfo is enabled but not when
  debug directives only are to be emitted - this aims towards keeping
  the code generation unmodified when generating line info only.
- Various changes needed to propagate the `dbg_directives_only`
  information into the right places as appropriate.

Testing for lineinfo in the CUDA target was a bit weak so it is
augmented with this commit to ensure it stays working. The additions
check both the LLVM IR metadata and PTX, and also check the debug info
for both kernels and device functions.

Fixes #8581 - "CUDA: Lineinfo doesn't work for device functions"
Fixes #8582 - "CUDA: Enabling lineinfo causes many redundant moves in
               SASS"
---
 numba/core/compiler.py                   |   7 +
 numba/core/debuginfo.py                  |  12 +-
 numba/core/lowering.py                   |   9 +-
 numba/cuda/compiler.py                   |   7 +-
 numba/cuda/dispatcher.py                 |   6 +-
 numba/cuda/target.py                     |  19 +--
 numba/cuda/tests/cudapy/test_lineinfo.py | 171 +++++++++++++++++++++--
 7 files changed, 203 insertions(+), 28 deletions(-)

diff --git a/numba/core/compiler.py b/numba/core/compiler.py
index b699937ca86..e8f0926d188 100644
--- a/numba/core/compiler.py
+++ b/numba/core/compiler.py
@@ -158,6 +158,13 @@ class Flags(TargetConfig):
              "Equivalent to adding optnone attribute in the LLVM Function.")
     )
 
+    dbg_directives_only = Option(
+        type=bool,
+        default=False,
+        doc=("Make debug emissions directives-only. "
+             "Used when generating lineinfo.")
+    )
+
 
 DEFAULT_FLAGS = Flags()
 DEFAULT_FLAGS.nrt = True
diff --git a/numba/core/debuginfo.py b/numba/core/debuginfo.py
index 4e2ad17bcb9..53a7927f03c 100644
--- a/numba/core/debuginfo.py
+++ b/numba/core/debuginfo.py
@@ -60,7 +60,7 @@ def finalize(self):
 
 class DummyDIBuilder(AbstractDIBuilder):
 
-    def __init__(self, module, filepath, cgctx):
+    def __init__(self, module, filepath, cgctx, directives_only):
         pass
 
     def mark_variable(self, builder, allocavalue, name, lltype, size, line,
@@ -89,12 +89,18 @@ class DIBuilder(AbstractDIBuilder):
     DBG_CU_NAME = 'llvm.dbg.cu'
     _DEBUG = False
 
-    def __init__(self, module, filepath, cgctx):
+    def __init__(self, module, filepath, cgctx, directives_only):
         self.module = module
         self.filepath = os.path.abspath(filepath)
         self.difile = self._di_file()
         self.subprograms = []
         self.cgctx = cgctx
+
+        if directives_only:
+            self.emission_kind = 'DebugDirectivesOnly'
+        else:
+            self.emission_kind = 'FullDebug'
+
         self.initialize()
 
     def initialize(self):
@@ -404,7 +410,7 @@ def _di_compile_unit(self):
             'producer': 'clang (Numba)',
             'runtimeVersion': 0,
             'isOptimized': config.OPT != 0,
-            'emissionKind': 1,  # 0-NoDebug, 1-FullDebug
+            'emissionKind': ir.DIToken(self.emission_kind),
         }, is_distinct=True)
 
     def _di_subroutine_type(self, line, function, argmap):
diff --git a/numba/core/lowering.py b/numba/core/lowering.py
index 84ce3514cfe..f62a49d58c5 100644
--- a/numba/core/lowering.py
+++ b/numba/core/lowering.py
@@ -64,9 +64,11 @@ def __init__(self, context, library, fndesc, func_ir, metadata=None):
         # debuginfo def location
         self.defn_loc = self._compute_def_location()
 
+        directives_only = self.flags.dbg_directives_only
         self.debuginfo = dibuildercls(module=self.module,
                                       filepath=func_ir.loc.filename,
-                                      cgctx=context)
+                                      cgctx=context,
+                                      directives_only=directives_only)
 
         # Subclass initialization
         self.init()
@@ -344,7 +346,10 @@ def _disable_sroa_like_opt(self):
         prevent alloca and subsequent load/store for locals) should be disabled.
         Currently, this is conditional solely on the presence of a request for
         the emission of debug information."""
-        return False if self.flags is None else self.flags.debuginfo
+        if self.flags is None:
+            return False
+
+        return self.flags.debuginfo and not self.flags.dbg_directives_only
 
     def _find_singly_assigned_variable(self):
         func_ir = self.func_ir
diff --git a/numba/cuda/compiler.py b/numba/cuda/compiler.py
index 979f9ea1ef2..5dd2d16ba80 100644
--- a/numba/cuda/compiler.py
+++ b/numba/cuda/compiler.py
@@ -199,10 +199,14 @@ def compile_cuda(pyfunc, return_type, args, debug=False, lineinfo=False,
     if debug or lineinfo:
         flags.debuginfo = True
 
+    if lineinfo:
+        flags.dbg_directives_only = True
+
     if debug:
         flags.error_model = 'python'
     else:
         flags.error_model = 'numpy'
+
     if inline:
         flags.forceinline = True
     if fastmath:
@@ -281,7 +285,8 @@ def compile_ptx(pyfunc, args, debug=False, lineinfo=False, device=False,
         linenum = code.co_firstlineno
 
         lib, kernel = tgt.prepare_cuda_kernel(cres.library, cres.fndesc, debug,
-                                              nvvm_options, filename, linenum)
+                                              lineinfo, nvvm_options, filename,
+                                              linenum)
 
     cc = cc or config.CUDA_DEFAULT_PTX_CC
     ptx = lib.get_asm_str(cc=cc)
diff --git a/numba/cuda/dispatcher.py b/numba/cuda/dispatcher.py
index f82b52d001e..b438c805265 100644
--- a/numba/cuda/dispatcher.py
+++ b/numba/cuda/dispatcher.py
@@ -82,7 +82,7 @@ def __init__(self, py_func, argtypes, link=None, debug=False,
 
         cres = compile_cuda(self.py_func, types.void, self.argtypes,
                             debug=self.debug,
-                            lineinfo=self.lineinfo,
+                            lineinfo=lineinfo,
                             inline=inline,
                             fastmath=fastmath,
                             nvvm_options=nvvm_options)
@@ -91,7 +91,7 @@ def __init__(self, py_func, argtypes, link=None, debug=False,
         filename = code.co_filename
         linenum = code.co_firstlineno
         lib, kernel = tgt_ctx.prepare_cuda_kernel(cres.library, cres.fndesc,
-                                                  debug, nvvm_options,
+                                                  debug, lineinfo, nvvm_options,
                                                   filename, linenum,
                                                   max_registers)
 
@@ -833,6 +833,7 @@ def compile_device(self, args):
             with self._compiling_counter:
 
                 debug = self.targetoptions.get('debug')
+                lineinfo = self.targetoptions.get('lineinfo')
                 inline = self.targetoptions.get('inline')
                 fastmath = self.targetoptions.get('fastmath')
 
@@ -843,6 +844,7 @@ def compile_device(self, args):
 
                 cres = compile_cuda(self.py_func, None, args,
                                     debug=debug,
+                                    lineinfo=lineinfo,
                                     inline=inline,
                                     fastmath=fastmath,
                                     nvvm_options=nvvm_options)
diff --git a/numba/cuda/target.py b/numba/cuda/target.py
index f794cb98a0e..0d0e11ca398 100644
--- a/numba/cuda/target.py
+++ b/numba/cuda/target.py
@@ -143,7 +143,7 @@ def mangler(self, name, argtypes, *, abi_tags=(), uid=None):
         return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags,
                                       uid=uid)
 
-    def prepare_cuda_kernel(self, codelib, fndesc, debug,
+    def prepare_cuda_kernel(self, codelib, fndesc, debug, lineinfo,
                             nvvm_options, filename, linenum,
                             max_registers=None):
         """
@@ -174,11 +174,12 @@ def prepare_cuda_kernel(self, codelib, fndesc, debug,
                                                 max_registers=max_registers)
         library.add_linking_library(codelib)
         wrapper = self.generate_kernel_wrapper(library, fndesc, kernel_name,
-                                               debug, filename, linenum)
+                                               debug, lineinfo, filename,
+                                               linenum)
         return library, wrapper
 
     def generate_kernel_wrapper(self, library, fndesc, kernel_name, debug,
-                                filename, linenum):
+                                lineinfo, filename, linenum):
         """
         Generate the kernel wrapper in the given ``library``.
         The function being wrapped is described by ``fndesc``.
@@ -199,10 +200,12 @@ def generate_kernel_wrapper(self, library, fndesc, kernel_name, debug,
         wrapfn = ir.Function(wrapper_module, wrapfnty, prefixed)
         builder = ir.IRBuilder(wrapfn.append_basic_block(''))
 
-        if debug:
-            debuginfo = self.DIBuilder(
-                module=wrapper_module, filepath=filename, cgctx=self,
-            )
+        if debug or lineinfo:
+            directives_only = lineinfo and not debug
+            debuginfo = self.DIBuilder(module=wrapper_module,
+                                       filepath=filename,
+                                       cgctx=self,
+                                       directives_only=directives_only)
             debuginfo.mark_subprogram(
                 wrapfn, kernel_name, fndesc.args, argtypes, linenum,
             )
@@ -267,7 +270,7 @@ def define_error_gv(postfix):
 
         nvvm.set_cuda_kernel(wrapfn)
         library.add_ir_module(wrapper_module)
-        if debug:
+        if debug or lineinfo:
             debuginfo.finalize()
         library.finalize()
         wrapfn = library.get_function(wrapfn.name)
diff --git a/numba/cuda/tests/cudapy/test_lineinfo.py b/numba/cuda/tests/cudapy/test_lineinfo.py
index 598fc66468f..2dae2b920dc 100644
--- a/numba/cuda/tests/cudapy/test_lineinfo.py
+++ b/numba/cuda/tests/cudapy/test_lineinfo.py
@@ -8,21 +8,67 @@
 
 @skip_on_cudasim('Simulator does not produce lineinfo')
 class TestCudaLineInfo(CUDATestCase):
-    """
-    These tests only check the compiled PTX for line mappings
-    """
-    def _getasm(self, fn, sig):
-        fn.compile(sig)
-        return fn.inspect_asm(sig)
+    def _loc_directive_regex(self):
+        # This is used in several tests
+
+        pat = (
+            r'\.loc'      # .loc directive beginning
+            r'\s*[0-9]*'  # whitespace then file index
+            r'\s*[0-9]*'  # whitespace then line number
+            r'\s*[0-9]*'  # whitespace then column position
+        )
+        return re.compile(pat)
 
     def _check(self, fn, sig, expect):
-        asm = self._getasm(fn, sig=sig)
+        fn.compile(sig)
+        llvm = fn.inspect_llvm(sig)
+        ptx = fn.inspect_asm(sig)
+        assertfn = self.assertIsNotNone if expect else self.assertIsNone
+
+        # DICompileUnit debug info metadata should all be of the
+        # DebugDirectivesOnly kind, and not the FullDebug kind
+        pat = (
+            r'!DICompileUnit\(.*'    # Opening of DICompileUnit metadata. Since
+                                     # the order of attributes is not
+                                     # guaranteed, we need to match arbitrarily
+                                     # aftwerwards.
+            r'emissionKind:\s*'      # The emissionKind attribute followed by
+                                     # whitespace.
+            r'DebugDirectivesOnly'   # The correct emissionKind.
+        )
+        match = re.compile(pat).search(llvm)
+        assertfn(match, msg=ptx)
+
+        pat = (
+            r'!DICompileUnit\(.*'  # Same as the pattern above, but for the
+            r'emissionKind:\s*'    # incorrect FullDebug emissionKind.
+            r'FullDebug'           #
+        )
+        match = re.compile(pat).search(llvm)
+        self.assertIsNone(match, msg=ptx)
+
         # The name of this file should be present in the line mapping
         # if lineinfo was propagated through correctly.
-        re_section_lineinfo = re.compile(r"test_lineinfo.py")
-        match = re_section_lineinfo.search(asm)
-        assertfn = self.assertIsNotNone if expect else self.assertIsNone
-        assertfn(match, msg=asm)
+        pat = (
+            r'\.file'                # .file directive beginning
+            r'\s*[0-9]*\s*'          # file number surrounded by whitespace
+            r'".*test_lineinfo.py"'  # filename in quotes, ignoring full path
+        )
+        match = re.compile(pat).search(ptx)
+        assertfn(match, msg=ptx)
+
+        # .loc directives should be present in the ptx
+        self._loc_directive_regex().search(ptx)
+        assertfn(match, msg=ptx)
+
+        # Debug info sections should not be present when only lineinfo is
+        # generated
+        pat = (
+            r'\.section\s*'  # .section directive beginning
+            r'\.debug_'      # Any section name beginning ".debug_"
+        )
+        match = re.compile(pat).search(ptx)
+        self.assertIsNone(match, msg=ptx)
 
     def test_no_lineinfo_in_asm(self):
         @cuda.jit(lineinfo=False)
@@ -33,7 +79,7 @@ def foo(x):
 
     def test_lineinfo_in_asm(self):
         if not NVVM().is_nvvm70:
-            self.skipTest("debuginfo not generated for NVVM 3.4")
+            self.skipTest("lineinfo not generated for NVVM 3.4")
 
         @cuda.jit(lineinfo=True)
         def foo(x):
@@ -56,6 +102,107 @@ def divide_kernel(x, y):
         # lineinfo is enabled) the device function always returns 0.
         self.assertNotIn('ret i32 1', llvm)
 
+    def test_no_lineinfo_in_device_function(self):
+        # Ensure that no lineinfo is generated in device functions by default.
+        @cuda.jit
+        def callee(x):
+            x[0] += 1
+
+        @cuda.jit
+        def caller(x):
+            x[0] = 1
+            callee(x)
+
+        sig = (int32[:],)
+        self._check(caller, sig=sig, expect=False)
+
+    def test_lineinfo_in_device_function(self):
+        if not NVVM().is_nvvm70:
+            self.skipTest("lineinfo not generated for NVVM 3.4")
+
+        # First we define a device function / kernel pair and run the usual
+        # checks on the generated LLVM and PTX.
+
+        @cuda.jit(lineinfo=True)
+        def callee(x):
+            x[0] += 1
+
+        @cuda.jit(lineinfo=True)
+        def caller(x):
+            x[0] = 1
+            callee(x)
+
+        sig = (int32[:],)
+        self._check(caller, sig=sig, expect=True)
+
+        # Now we can check the PTX of the device function specifically.
+
+        ptx = caller.inspect_asm(sig)
+        ptxlines = ptx.splitlines()
+
+        # To check the device function, we need to identify its boundaries.
+
+        # A line beginning with ".weak .func"
+        devfn_start = re.compile(r'^\.weak\s*\.func')
+
+        # Identify the beginning of the function.
+        start = None
+
+        for lineno, line in enumerate(ptxlines):
+            if devfn_start.match(line) is not None:
+                # We will begin our search on the line following the
+                # declaration
+                start = lineno + 1
+
+        if start is None:
+            self.fail(f'Could not identify device function in:\n\n{ptx}')
+
+        # Identify the end of the function
+        end = None
+
+        for offset, line in enumerate(ptxlines[start:]):
+            # Assume the end of the function is a line with an unindented '}'
+            if line[:1] == '}':
+                end = start + offset
+
+        if end is None:
+            self.fail(f'Could not identify end of device function in:\n\n{ptx}')
+
+        # Scan for .loc directives in the device function.
+        loc_directive = self._loc_directive_regex()
+        found = False
+
+        for line in ptxlines[start:end]:
+            if loc_directive.search(line) is not None:
+                found = True
+
+        if not found:
+            # Join one line either side so the function as a whole is shown,
+            # i.e. including the declaration and parameter list, and the
+            # closing brace.
+            devfn = "\n".join(ptxlines[start - 1:end + 1])
+            self.fail(f'.loc directive not found in:\n\n{devfn}')
+
+        # We also inspect the LLVM to ensure that there's debug info for each
+        # subprogram (function). A lightweight way to check this is to ensure
+        # that we have as many DISubprograms as we expect.
+
+        llvm = caller.inspect_llvm(sig)
+        subprograms = 0
+        for line in llvm.splitlines():
+            if 'distinct !DISubprogram' in line:
+                subprograms += 1
+
+        # One DISubprogram for each of:
+        # - The kernel wrapper
+        # - The caller
+        # - The callee
+        expected_subprograms = 3
+
+        self.assertEqual(subprograms, expected_subprograms,
+                         f'"Expected {expected_subprograms} DISubprograms; '
+                         f'got {subprograms}')
+
 
 if __name__ == '__main__':
     unittest.main()

From a9ffa8c3f151c3286f5793f72b0b7c25f24b9c5c Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Wed, 18 Jan 2023 16:58:12 +0000
Subject: [PATCH 18/45] CUDA: Warn when requesting debug and lineinfo

Also we add lineinfo as an explicit kwarg to the `@cuda.jit` decorator.
---
 numba/cuda/decorators.py                 | 12 ++++++++++--
 numba/cuda/tests/cudapy/test_lineinfo.py | 18 ++++++++++++++++--
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/numba/cuda/decorators.py b/numba/cuda/decorators.py
index a755ea0c82e..41348c7c98d 100644
--- a/numba/cuda/decorators.py
+++ b/numba/cuda/decorators.py
@@ -12,7 +12,7 @@
 
 
 def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
-        opt=True, cache=False, **kws):
+        opt=True, lineinfo=False, cache=False, **kws):
     """
     JIT compile a Python function for CUDA GPUs.
 
@@ -80,6 +80,12 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
                " - set debug=False or opt=False.")
         warn(NumbaInvalidConfigWarning(msg))
 
+    if debug and lineinfo:
+        msg = ("debug and lineinfo are mutually exclusive. Use debug to get "
+               "full debug info (this disables some optimizations), or "
+               "lineinfo for line info only with code generation unaffected.")
+        warn(NumbaInvalidConfigWarning(msg))
+
     if device and kws.get('link'):
         raise ValueError("link keyword invalid for device function")
 
@@ -101,6 +107,7 @@ def jitwrapper(func):
         def _jit(func):
             targetoptions = kws.copy()
             targetoptions['debug'] = debug
+            targetoptions['lineinfo'] = lineinfo
             targetoptions['link'] = link
             targetoptions['opt'] = opt
             targetoptions['fastmath'] = fastmath
@@ -140,7 +147,7 @@ def autojitwrapper(func):
             else:
                 def autojitwrapper(func):
                     return jit(func, device=device, debug=debug, opt=opt,
-                               link=link, cache=cache, **kws)
+                               lineinfo=lineinfo, link=link, cache=cache, **kws)
 
             return autojitwrapper
         # func_or_sig is a function
@@ -151,6 +158,7 @@ def autojitwrapper(func):
             else:
                 targetoptions = kws.copy()
                 targetoptions['debug'] = debug
+                targetoptions['lineinfo'] = lineinfo
                 targetoptions['opt'] = opt
                 targetoptions['link'] = link
                 targetoptions['fastmath'] = fastmath
diff --git a/numba/cuda/tests/cudapy/test_lineinfo.py b/numba/cuda/tests/cudapy/test_lineinfo.py
index 2dae2b920dc..ee23daad789 100644
--- a/numba/cuda/tests/cudapy/test_lineinfo.py
+++ b/numba/cuda/tests/cudapy/test_lineinfo.py
@@ -1,9 +1,10 @@
-from numba.cuda.testing import skip_on_cudasim
 from numba import cuda, float32, int32
-from numba.cuda.testing import CUDATestCase
+from numba.core.errors import NumbaInvalidConfigWarning
+from numba.cuda.testing import CUDATestCase, skip_on_cudasim
 from numba.cuda.cudadrv.nvvm import NVVM
 import re
 import unittest
+import warnings
 
 
 @skip_on_cudasim('Simulator does not produce lineinfo')
@@ -203,6 +204,19 @@ def caller(x):
                          f'"Expected {expected_subprograms} DISubprograms; '
                          f'got {subprograms}')
 
+    def test_debug_and_lineinfo_warning(self):
+        with warnings.catch_warnings(record=True) as w:
+            # We pass opt=False to prevent the warning about opt and debug
+            # occurring as well
+            @cuda.jit(debug=True, lineinfo=True, opt=False)
+            def f():
+                pass
+
+        self.assertEqual(len(w), 1)
+        self.assertEqual(w[0].category, NumbaInvalidConfigWarning)
+        self.assertIn('debug and lineinfo are mutually exclusive',
+                      str(w[0].message))
+
 
 if __name__ == '__main__':
     unittest.main()

From 08e5e79278f13e5147f9a7f458508e3b555a0c1a Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Wed, 18 Jan 2023 17:05:11 +0000
Subject: [PATCH 19/45] Update docstring for prepare_cuda_kernel to match args

---
 numba/cuda/target.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/numba/cuda/target.py b/numba/cuda/target.py
index 015a997f3f6..77470320e9b 100644
--- a/numba/cuda/target.py
+++ b/numba/cuda/target.py
@@ -165,6 +165,7 @@ def prepare_cuda_kernel(self, codelib, fndesc, debug, lineinfo,
                        in a kernel call.
         fndesc:        The FunctionDescriptor of the source function.
         debug:         Whether to compile with debug.
+        lineinfo:      Whether to emit line info.
         nvvm_options:  Dict of NVVM options used when compiling the new library.
         filename:      The source filename that the function is contained in.
         linenum:       The source line that the function is on.

From 3825fdc3d2cf075cfba9cce0ffd068ebaa84dff2 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Wed, 18 Jan 2023 17:09:07 +0000
Subject: [PATCH 20/45] Enforce whitespace checks in test_lineinfo a bit more
 aggressively

---
 numba/cuda/tests/cudapy/test_lineinfo.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/numba/cuda/tests/cudapy/test_lineinfo.py b/numba/cuda/tests/cudapy/test_lineinfo.py
index ee23daad789..bc53fbf6a0f 100644
--- a/numba/cuda/tests/cudapy/test_lineinfo.py
+++ b/numba/cuda/tests/cudapy/test_lineinfo.py
@@ -14,9 +14,9 @@ def _loc_directive_regex(self):
 
         pat = (
             r'\.loc'      # .loc directive beginning
-            r'\s*[0-9]*'  # whitespace then file index
-            r'\s*[0-9]*'  # whitespace then line number
-            r'\s*[0-9]*'  # whitespace then column position
+            r'\s+[0-9]+'  # whitespace then file index
+            r'\s+[0-9]+'  # whitespace then line number
+            r'\s+[0-9]+'  # whitespace then column position
         )
         return re.compile(pat)
 
@@ -32,8 +32,8 @@ def _check(self, fn, sig, expect):
             r'!DICompileUnit\(.*'    # Opening of DICompileUnit metadata. Since
                                      # the order of attributes is not
                                      # guaranteed, we need to match arbitrarily
-                                     # aftwerwards.
-            r'emissionKind:\s*'      # The emissionKind attribute followed by
+                                     # afterwards.
+            r'emissionKind:\s+'      # The emissionKind attribute followed by
                                      # whitespace.
             r'DebugDirectivesOnly'   # The correct emissionKind.
         )
@@ -42,7 +42,7 @@ def _check(self, fn, sig, expect):
 
         pat = (
             r'!DICompileUnit\(.*'  # Same as the pattern above, but for the
-            r'emissionKind:\s*'    # incorrect FullDebug emissionKind.
+            r'emissionKind:\s+'    # incorrect FullDebug emissionKind.
             r'FullDebug'           #
         )
         match = re.compile(pat).search(llvm)
@@ -52,7 +52,7 @@ def _check(self, fn, sig, expect):
         # if lineinfo was propagated through correctly.
         pat = (
             r'\.file'                # .file directive beginning
-            r'\s*[0-9]*\s*'          # file number surrounded by whitespace
+            r'\s+[0-9]+\s+'          # file number surrounded by whitespace
             r'".*test_lineinfo.py"'  # filename in quotes, ignoring full path
         )
         match = re.compile(pat).search(ptx)
@@ -65,7 +65,7 @@ def _check(self, fn, sig, expect):
         # Debug info sections should not be present when only lineinfo is
         # generated
         pat = (
-            r'\.section\s*'  # .section directive beginning
+            r'\.section\s+'  # .section directive beginning
             r'\.debug_'      # Any section name beginning ".debug_"
         )
         match = re.compile(pat).search(ptx)

From b75097e05498ba1027a8182559b039cfa3597dfd Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Wed, 18 Jan 2023 17:14:07 +0000
Subject: [PATCH 21/45] CUDA: break in search loops in test_lineinfo when a
 match is found

---
 numba/cuda/tests/cudapy/test_lineinfo.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/numba/cuda/tests/cudapy/test_lineinfo.py b/numba/cuda/tests/cudapy/test_lineinfo.py
index bc53fbf6a0f..de3dc823357 100644
--- a/numba/cuda/tests/cudapy/test_lineinfo.py
+++ b/numba/cuda/tests/cudapy/test_lineinfo.py
@@ -154,6 +154,7 @@ def caller(x):
                 # We will begin our search on the line following the
                 # declaration
                 start = lineno + 1
+                break
 
         if start is None:
             self.fail(f'Could not identify device function in:\n\n{ptx}')
@@ -165,6 +166,7 @@ def caller(x):
             # Assume the end of the function is a line with an unindented '}'
             if line[:1] == '}':
                 end = start + offset
+                break
 
         if end is None:
             self.fail(f'Could not identify end of device function in:\n\n{ptx}')
@@ -176,6 +178,7 @@ def caller(x):
         for line in ptxlines[start:end]:
             if loc_directive.search(line) is not None:
                 found = True
+                break
 
         if not found:
             # Join one line either side so the function as a whole is shown,

From 5ecf0d4e3e155811a42b110555f0925e20a9d522 Mon Sep 17 00:00:00 2001
From: Andre Masella <andre@masella.name>
Date: Mon, 24 Oct 2022 14:06:50 -0400
Subject: [PATCH 22/45] LLVM14

---
 numba/core/callconv.py                     | 24 ++++++++++------------
 numba/core/codegen.py                      | 23 +++++++++++++--------
 numba/cuda/tests/cudapy/test_dispatcher.py |  6 +++---
 numba/misc/llvm_pass_timings.py            | 15 ++++++++++----
 numba/tests/test_array_reductions.py       |  5 +++++
 numba/tests/test_debuginfo.py              | 15 ++++++++++----
 numba/tests/test_np_functions.py           | 14 +++++++++++++
 numba/tests/test_np_randomgen.py           |  6 ++++++
 numba/tests/test_parfors.py                | 18 ++++------------
 numba/tests/test_refop_pruning.py          |  4 ++--
 numba/tests/test_svml.py                   | 12 +----------
 11 files changed, 82 insertions(+), 60 deletions(-)

diff --git a/numba/core/callconv.py b/numba/core/callconv.py
index 9347b92b1f8..b69c189996f 100644
--- a/numba/core/callconv.py
+++ b/numba/core/callconv.py
@@ -9,7 +9,7 @@
 from llvmlite import ir
 
 from numba.core import types, cgutils
-from numba.core.base import PYOBJECT, GENERIC_POINTER
+from numba.core.base import GENERIC_POINTER
 
 
 TryStatus = namedtuple('TryStatus', ['in_try', 'excinfo'])
@@ -90,7 +90,7 @@ def return_native_none(self, builder):
         self._return_errcode_raw(builder, RETCODE_NONE)
 
     def return_exc(self, builder):
-        self._return_errcode_raw(builder, RETCODE_EXC, mark_exc=True)
+        self._return_errcode_raw(builder, RETCODE_EXC)
 
     def return_stop_iteration(self, builder):
         self._return_errcode_raw(builder, RETCODE_STOPIT)
@@ -207,12 +207,12 @@ def return_user_exc(self, builder, exc, exc_args=None, loc=None,
 
         call_helper = self._get_call_helper(builder)
         exc_id = call_helper._add_exception(exc, exc_args, locinfo)
-        self._return_errcode_raw(builder, _const_int(exc_id), mark_exc=True)
+        self._return_errcode_raw(builder, _const_int(exc_id))
 
     def return_status_propagate(self, builder, status):
         self._return_errcode_raw(builder, status.code)
 
-    def _return_errcode_raw(self, builder, code, mark_exc=False):
+    def _return_errcode_raw(self, builder, code):
         if isinstance(code, int):
             code = _const_int(code)
         builder.ret(code)
@@ -395,7 +395,9 @@ def set_static_user_exc(self, builder, exc, exc_args=None, loc=None,
         exc = (exc, exc_args, locinfo)
         struct_gv = pyapi.serialize_object(exc)
         excptr = self._get_excinfo_argument(builder.function)
-        builder.store(struct_gv, excptr)
+        store = builder.store(struct_gv, excptr)
+        md = builder.module.add_metadata([ir.IntType(1)(1)])
+        store.set_metadata("numba_exception_output", md)
 
     def return_user_exc(self, builder, exc, exc_args=None, loc=None,
                         func_name=None):
@@ -409,7 +411,7 @@ def return_user_exc(self, builder, exc, exc_args=None, loc=None,
             builder.branch(try_info['target'])
         else:
             # Return from the current function
-            self._return_errcode_raw(builder, RETCODE_USEREXC, mark_exc=True)
+            self._return_errcode_raw(builder, RETCODE_USEREXC)
 
     def _get_try_state(self, builder):
         try:
@@ -457,14 +459,10 @@ def return_status_propagate(self, builder, status):
         excptr = self._get_excinfo_argument(builder.function)
         builder.store(status.excinfoptr, excptr)
         with builder.if_then(builder.not_(trystatus.in_try)):
-            self._return_errcode_raw(builder, status.code, mark_exc=True)
+            self._return_errcode_raw(builder, status.code)
 
-    def _return_errcode_raw(self, builder, code, mark_exc=False):
-        ret = builder.ret(code)
-
-        if mark_exc:
-            md = builder.module.add_metadata([ir.IntType(1)(1)])
-            ret.set_metadata("ret_is_raise", md)
+    def _return_errcode_raw(self, builder, code):
+        builder.ret(code)
 
     def _get_return_status(self, builder, code, excinfoptr):
         """
diff --git a/numba/core/codegen.py b/numba/core/codegen.py
index e988fab1263..2d694901780 100644
--- a/numba/core/codegen.py
+++ b/numba/core/codegen.py
@@ -234,7 +234,7 @@ def init_digraph(name, fname, fontsize):
         nrt_meminfo = re.compile("@NRT_MemInfo")
         ll_intrin_calls = re.compile(r".*call.*@llvm\..*")
         ll_function_call = re.compile(r".*call.*@.*")
-        ll_raise = re.compile(r"ret i32.*\!ret_is_raise.*")
+        ll_raise = re.compile(r"store .*\!numba_exception_output.*")
         ll_return = re.compile("ret i32 [^1],?.*")
 
         # wrapper function for line wrapping LLVM lines
@@ -1215,14 +1215,19 @@ def _module_pass_manager(self, **kwargs):
             # This knocks loops into rotated form early to reduce the likelihood
             # of vectorization failing due to unknown PHI nodes.
             pm.add_loop_rotate_pass()
-            # LLVM 11 added LFTR to the IV Simplification pass, this interacted
-            # badly with the existing use of the InstructionCombiner here and
-            # ended up with PHI nodes that prevented vectorization from
-            # working. The desired vectorization effects can be achieved
-            # with this in LLVM 11 (and also < 11) but at a potentially
-            # slightly higher cost:
-            pm.add_licm_pass()
-            pm.add_cfg_simplification_pass()
+            if ll.llvm_version_info[0] < 12:
+                # LLVM 11 added LFTR to the IV Simplification pass,
+                # this interacted badly with the existing use of the
+                # InstructionCombiner here and ended up with PHI nodes that
+                # prevented vectorization from working. The desired
+                # vectorization effects can be achieved with this in LLVM 11
+                # (and also < 11) but at a potentially slightly higher cost:
+                pm.add_licm_pass()
+                pm.add_cfg_simplification_pass()
+            else:
+                pm.add_instruction_combining_pass()
+                pm.add_jump_threading_pass()
+
         if config.LLVM_REFPRUNE_PASS:
             pm.add_refprune_pass(_parse_refprune_flags())
         return pm
diff --git a/numba/cuda/tests/cudapy/test_dispatcher.py b/numba/cuda/tests/cudapy/test_dispatcher.py
index 0cb427c7f6f..2e9bcd5a3e2 100644
--- a/numba/cuda/tests/cudapy/test_dispatcher.py
+++ b/numba/cuda/tests/cudapy/test_dispatcher.py
@@ -312,9 +312,9 @@ def test_explicit_signatures_ambiguous_resolution(self):
         self.assertRegexpMatches(
             str(cm.exception),
             r"Ambiguous overloading for <function add_kernel [^>]*> "
-            r"\(array\(float64, 1d, C\), float64, float64\):\n"
-            r"\(array\(float64, 1d, C\), float32, float64\) -> none\n"
-            r"\(array\(float64, 1d, C\), float64, float32\) -> none"
+            r"\([Aa]rray\(float64, 1d?, [^)]*\), float64, float64\):\n"
+            r"\([Aa]rray\(float64, 1d?, [^)]*\), float32, float64\) -> none\n"
+            r"\([Aa]rray\(float64, 1d?, [^)]*\), float64, float32\) -> none"
         )
         # The integer signature is not part of the best matches
         self.assertNotIn("int64", str(cm.exception))
diff --git a/numba/misc/llvm_pass_timings.py b/numba/misc/llvm_pass_timings.py
index c9ebb8ec6b6..427d0e202fa 100644
--- a/numba/misc/llvm_pass_timings.py
+++ b/numba/misc/llvm_pass_timings.py
@@ -52,6 +52,7 @@ def get(self):
         "wall_time",
         "wall_percent",
         "pass_name",
+        "instruction",
     ],
 )
 
@@ -216,6 +217,7 @@ def parse(raw_data):
                 "System Time": "system",
                 "User+System": "user_system",
                 "Wall Time": "wall",
+                "Instr": "instruction",
                 "Name": "pass_name",
             }
             for ln in line_iter:
@@ -229,17 +231,22 @@ def parse(raw_data):
             assert headers[-1] == 'pass_name'
             # compute the list of available attributes from the column headers
             attrs = []
+            n = r"\s*((?:[0-9]+\.)?[0-9]+)"
+            pat = ""
             for k in headers[:-1]:
-                attrs.append(f"{k}_time")
-                attrs.append(f"{k}_percent")
+                if k == "instruction":
+                    pat += n
+                else:
+                    attrs.append(f"{k}_time")
+                    attrs.append(f"{k}_percent")
+                    pat += f"\\s+(?:{n}\\s*\\({n}%\\)|-+)"
+
             # put default value 0.0 to all missing attributes
             missing = {}
             for k in PassTimingRecord._fields:
                 if k not in attrs and k != 'pass_name':
                     missing[k] = 0.0
             # parse timings
-            n = r"\s*((?:[0-9]+\.)?[0-9]+)"
-            pat = f"\\s+(?:{n}\\s*\\({n}%\\)|-+)" * (len(headers) - 1)
             pat += r"\s*(.*)"
             for ln in line_iter:
                 m = re.match(pat, ln)
diff --git a/numba/tests/test_array_reductions.py b/numba/tests/test_array_reductions.py
index 36e5a67d7ec..414060e5d60 100644
--- a/numba/tests/test_array_reductions.py
+++ b/numba/tests/test_array_reductions.py
@@ -1000,6 +1000,8 @@ def assert_raises(arr, axis):
         assert_raises(arr1d, -2)
         assert_raises(arr2d, -3)
         assert_raises(arr2d, 2)
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def test_argmax_axis_must_be_integer(self):
         arr = np.arange(6)
@@ -1073,6 +1075,9 @@ def assert_raises(arr, axis):
         assert_raises(arr2d, -3)
         assert_raises(arr2d, 2)
 
+        # Exceptions leak references
+        self.disable_leak_check()
+
     def test_argmin_axis_must_be_integer(self):
         arr = np.arange(6)
 
diff --git a/numba/tests/test_debuginfo.py b/numba/tests/test_debuginfo.py
index 1386021058b..a0e3326c31a 100644
--- a/numba/tests/test_debuginfo.py
+++ b/numba/tests/test_debuginfo.py
@@ -184,7 +184,7 @@ def test_DILocation(self):
         @njit(debug=True, error_model='numpy')
         def foo(a):
             b = a + 1.23
-            c = a * 2.34
+            c = b * 2.34
             d = b / c
             print(d)
             return d
@@ -223,9 +223,16 @@ def foo(a):
 
         # Find non-call instr and check the sequence is as expected
         instrs = [x for x in block.instructions if x.opcode != 'call']
-        op_seq = [x.opcode for x in instrs]
-        op_expect = ('fadd', 'fmul', 'fdiv')
-        self.assertIn(''.join(op_expect), ''.join(op_seq))
+        op_expect = {'fadd', 'fmul', 'fdiv'}
+        started = False
+        for x in instrs:
+            if x.opcode in op_expect:
+                op_expect.remove(x.opcode)
+                if not started:
+                    started = True
+            elif op_expect and started:
+                self.assertTrue(False, "Math opcodes are not contiguous")
+        self.assertFalse(op_expect, "Math opcodes were not found")
 
         # Parse out metadata from end of each line, check it monotonically
         # ascends with LLVM source line. Also store all the dbg references,
diff --git a/numba/tests/test_np_functions.py b/numba/tests/test_np_functions.py
index 1480c7579ab..df8dcd5597c 100644
--- a/numba/tests/test_np_functions.py
+++ b/numba/tests/test_np_functions.py
@@ -538,6 +538,8 @@ def test_sinc_exceptions(self):
             cfunc('str')
         self.assertIn('Argument "x" must be a Number or array-like',
                       str(raises.exception))
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def test_contains(self):
         def arrs():
@@ -642,6 +644,8 @@ def test_angle_exceptions(self):
             cfunc('hello')
         self.assertIn('Argument "z" must be a complex or Array[complex]',
                       str(raises.exception))
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def test_array_equal(self):
         def arrays():
@@ -769,6 +773,8 @@ def test_np_append_exceptions(self):
             'The third argument "axis" must be an integer',
             str(raises.exception)
         )
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def test_delete(self):
 
@@ -832,6 +838,8 @@ def test_delete_exceptions(self):
             'obj must be less than the len(arr)',
             str(raises.exception),
         )
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def diff_arrays(self):
         """
@@ -885,6 +893,8 @@ def test_diff2_exceptions(self):
             with self.assertRaises(ValueError) as raises:
                 cfunc(arr, n)
             self.assertIn("order must be non-negative", str(raises.exception))
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def test_isscalar(self):
         def values():
@@ -1089,6 +1099,8 @@ def test_bincount1_exceptions(self):
             cfunc([2, -1])
         self.assertIn("first argument must be non-negative",
                       str(raises.exception))
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def test_bincount2(self):
         pyfunc = bincount2
@@ -4866,6 +4878,8 @@ def not_literal_axis(a, i, axis):
         with self.assertRaises(ValueError) as raises:
             gen(0)(arr2d, np.ones((2, 3), dtype=np.uint64))
         self.assertIn("dimensions don't match", str(raises.exception))
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def test_nan_to_num(self):
         # Test cases are from
diff --git a/numba/tests/test_np_randomgen.py b/numba/tests/test_np_randomgen.py
index 26bd4e8c197..8fd92fdb0a5 100644
--- a/numba/tests/test_np_randomgen.py
+++ b/numba/tests/test_np_randomgen.py
@@ -1094,6 +1094,8 @@ def test_noncentral_chisquare(self):
             curr_args[2] = -1
             nb_dist_func(*curr_args)
         self.assertIn('nonc < 0', str(raises.exception))
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def test_noncentral_f(self):
         # For this test dtype argument is never used, so we pass [None] as dtype
@@ -1138,6 +1140,8 @@ def test_noncentral_f(self):
             curr_args[3] = -1
             nb_dist_func(*curr_args)
         self.assertIn('nonc < 0', str(raises.exception))
+        # Exceptions leak references
+        self.disable_leak_check()
 
     def test_logseries(self):
         # For this test dtype argument is never used, so we pass [None] as dtype
@@ -1170,6 +1174,8 @@ def test_logseries(self):
                 curr_args[1] = _p
                 nb_dist_func(*curr_args)
             self.assertIn('p < 0, p >= 1 or p is NaN', str(raises.exception))
+        # Exceptions leak references
+        self.disable_leak_check()
 
 
 class TestGeneratorCaching(TestCase, SerialMixin):
diff --git a/numba/tests/test_parfors.py b/numba/tests/test_parfors.py
index b232e30e367..4094db123b2 100644
--- a/numba/tests/test_parfors.py
+++ b/numba/tests/test_parfors.py
@@ -415,7 +415,7 @@ def _get_fast_instructions(ir):
             return fast_inst
 
         def _assert_fast(instrs):
-            ops = ('fadd', 'fsub', 'fmul', 'fdiv', 'frem', 'fcmp')
+            ops = ('fadd', 'fsub', 'fmul', 'fdiv', 'frem', 'fcmp', 'call')
             for inst in instrs:
                 count = 0
                 for op in ops:
@@ -4462,11 +4462,6 @@ def get_gufunc_asm(self, func, schedule_type, *args, **kwargs):
 
             return asm
 
-    # this is a common match pattern for something like:
-    # \n\tvsqrtpd\t-192(%rbx,%rsi,8), %zmm0\n
-    # to check vsqrtpd operates on zmm
-    match_vsqrtpd_on_zmm = re.compile('\n\s+vsqrtpd\s+.*zmm.*\n')
-
     @linux_only
     def test_vectorizer_fastmath_asm(self):
         """ This checks that if fastmath is set and the underlying hardware
@@ -4490,22 +4485,19 @@ def will_vectorize(A):
                                        fastmath=True)
         slow_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg,
                                        fastmath=False)
-
         for v in fast_asm.values():
             # should unwind and call vector sqrt then vector add
             # all on packed doubles using zmm's
             self.assertTrue('vaddpd' in v)
-            self.assertTrue('vsqrtpd' in v)
+            self.assertTrue('vsqrtpd' in v or '__svml_sqrt' in v)
             self.assertTrue('zmm' in v)
-            # make sure vsqrtpd operates on zmm
-            self.assertTrue(len(self.match_vsqrtpd_on_zmm.findall(v)) > 1)
 
         for v in slow_asm.values():
             # vector variants should not be present
             self.assertTrue('vaddpd' not in v)
             self.assertTrue('vsqrtpd' not in v)
             # check scalar variant is present
-            self.assertTrue('vsqrtsd' in v)
+            self.assertTrue('vsqrtsd' in v and '__svml_sqrt' not in v)
             self.assertTrue('vaddsd' in v)
             # check no zmm addressing is present
             self.assertTrue('zmm' not in v)
@@ -4550,11 +4542,9 @@ def will_vectorize(A):
         for v in vec_asm.values():
             # should unwind and call vector sqrt then vector mov
             # all on packed doubles using zmm's
-            self.assertTrue('vsqrtpd' in v)
+            self.assertTrue('vsqrtpd' in v or '__svml_sqrt' in v)
             self.assertTrue('vmovupd' in v)
             self.assertTrue('zmm' in v)
-            # make sure vsqrtpd operates on zmm
-            self.assertTrue(len(self.match_vsqrtpd_on_zmm.findall(v)) > 1)
 
     @linux_only
     # needed as 32bit doesn't have equivalent signed/unsigned instruction
diff --git a/numba/tests/test_refop_pruning.py b/numba/tests/test_refop_pruning.py
index 04433bf80f5..6cd7fdafa7c 100644
--- a/numba/tests/test_refop_pruning.py
+++ b/numba/tests/test_refop_pruning.py
@@ -118,9 +118,9 @@ def func(n):
                     raise ValueError
             return x
 
-        with set_refprune_flags('per_bb,fanout_raise'):
+        with set_refprune_flags('per_bb,fanout'):
             self.check(func, (types.intp), basicblock=True, diamond=False,
-                       fanout=False, fanout_raise=True)
+                       fanout=True, fanout_raise=False)
 
 
 class TestRefPruneFlags(TestCase):
diff --git a/numba/tests/test_svml.py b/numba/tests/test_svml.py
index 1822b2e3f0a..327de2938e8 100644
--- a/numba/tests/test_svml.py
+++ b/numba/tests/test_svml.py
@@ -73,13 +73,12 @@
     "round":      [],  # round],
     "sind":       [],
     "sinh":    [np.sinh, math.sinh],
-    "sqrt":    [np.sqrt, math.sqrt],
     "tan":     [np.tan, math.tan],
     "tanh":    [np.tanh, math.tanh],
     "trunc":      [],  # np.trunc, math.trunc],
 }
 # TODO: these functions are not vectorizable with complex types
-complex_funcs_exclude = ["sqrt", "tan", "log10", "expm1", "log1p", "tanh", "log"]
+complex_funcs_exclude = ["tan", "log10", "expm1", "log1p", "tanh", "log"]
 
 # remove untested entries
 svml_funcs = {k: v for k, v in svml_funcs.items() if len(v) > 0}
@@ -130,15 +129,6 @@ def func_patterns(func, args, res, dtype, mode, vlen, fastmath, pad=' '*8):
                      #                     generating the failsafe scalar paths
         if vlen != 8 and (is_f32 or dtype == 'int32'):  # Issue #3016
             avoids += ['%zmm', '__svml_%s%d%s,' % (f, v*2, prec_suff)]
-    # special handling
-    if func == 'sqrt':
-        if mode == "scalar":
-            contains = ['sqrts']
-            avoids = [scalar_func, svml_func]  # LLVM uses CPU instruction
-        elif vlen == 8:
-            contains = ['vsqrtp']
-            avoids = [scalar_func, svml_func]  # LLVM uses CPU instruction
-        # else expect use of SVML for older architectures
     return body, contains, avoids
 
 

From f0e9adbbfd200a2dc88392ba87cfd51dcdc7252c Mon Sep 17 00:00:00 2001
From: Andre Masella <andre@masella.name>
Date: Thu, 2 Feb 2023 13:57:40 -0500
Subject: [PATCH 23/45] DO NOT MERGE: LLVM14 TESTING INFRASTRUCTURE

---
 buildscripts/condarecipe.local/meta.yaml             | 4 ++--
 buildscripts/incremental/setup_conda_environment.cmd | 2 +-
 buildscripts/incremental/setup_conda_environment.sh  | 2 +-
 numba/__init__.py                                    | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/buildscripts/condarecipe.local/meta.yaml b/buildscripts/condarecipe.local/meta.yaml
index 3b3232d457a..807e7d60f14 100644
--- a/buildscripts/condarecipe.local/meta.yaml
+++ b/buildscripts/condarecipe.local/meta.yaml
@@ -34,7 +34,7 @@ requirements:
     - setuptools
     - importlib_metadata       # [py<39]
     # On channel https://anaconda.org/numba/
-    - llvmlite >=0.40.0dev0,<0.40
+    - llvmlite >=0.40.0dev0llvm14,<0.40
     # TBB devel version is to match TBB libs.
     # 2020.3 is the last version with the "old" ABI
     # NOTE: 2021.1..2021.5 are API compatible for Numba's purposes.
@@ -48,7 +48,7 @@ requirements:
     - setuptools
     - importlib_metadata       # [py<39]
     # On channel https://anaconda.org/numba/
-    - llvmlite >=0.40.0dev0,<0.40
+    - llvmlite ==0.40.0dev0llvm14
   run_constrained:
     # If TBB is present it must be at least version 2021
     - tbb >=2021    # [not (aarch64 or ppc64le)]
diff --git a/buildscripts/incremental/setup_conda_environment.cmd b/buildscripts/incremental/setup_conda_environment.cmd
index 4518ec38547..4b382b0fa1c 100644
--- a/buildscripts/incremental/setup_conda_environment.cmd
+++ b/buildscripts/incremental/setup_conda_environment.cmd
@@ -25,7 +25,7 @@ conda create -n %CONDA_ENV% -q -y python=%PYTHON% numpy=%NUMPY% cffi pip scipy j
 
 call activate %CONDA_ENV%
 @rem Install latest llvmlite build
-%CONDA_INSTALL% -c numba/label/dev llvmlite=0.40
+%CONDA_INSTALL% -c numba/label/dev "llvmlite=0.40.0dev0llvm14*"
 @rem Install required backports for older Pythons
 if %PYTHON% LSS 3.9 (%CONDA_INSTALL% importlib_metadata)
 @rem Install dependencies for building the documentation
diff --git a/buildscripts/incremental/setup_conda_environment.sh b/buildscripts/incremental/setup_conda_environment.sh
index 8a1adf666d5..dca44001852 100755
--- a/buildscripts/incremental/setup_conda_environment.sh
+++ b/buildscripts/incremental/setup_conda_environment.sh
@@ -62,7 +62,7 @@ elif  [[ $(uname) == Darwin ]]; then
 fi
 
 # Install latest correct build
-$CONDA_INSTALL -c numba/label/dev llvmlite=0.40
+$CONDA_INSTALL -c numba/label/dev "llvmlite=0.40.0dev0llvm14*"
 
 # Install importlib-metadata for Python < 3.9
 if [ $PYTHON \< "3.9" ]; then $CONDA_INSTALL importlib_metadata; fi
diff --git a/numba/__init__.py b/numba/__init__.py
index 691e08c4b0e..3bd14fea801 100644
--- a/numba/__init__.py
+++ b/numba/__init__.py
@@ -143,7 +143,7 @@ def test(argv, **kwds):
 
 
 _min_llvmlite_version = (0, 40, 0)
-_min_llvm_version = (11, 0, 0)
+_min_llvm_version = (14, 0, 0)
 
 def _ensure_llvm():
     """

From 5b45803cefb4724046b2a7e10becba35a3fb90a1 Mon Sep 17 00:00:00 2001
From: Andre Masella <andre@masella.name>
Date: Fri, 10 Feb 2023 16:40:46 -0500
Subject: [PATCH 24/45] Add additional refprune test

As titled
---
 numba/tests/test_refop_pruning.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/numba/tests/test_refop_pruning.py b/numba/tests/test_refop_pruning.py
index 6cd7fdafa7c..a0e8621d7d5 100644
--- a/numba/tests/test_refop_pruning.py
+++ b/numba/tests/test_refop_pruning.py
@@ -122,6 +122,22 @@ def func(n):
             self.check(func, (types.intp), basicblock=True, diamond=False,
                        fanout=True, fanout_raise=False)
 
+    def test_fanout_3(self):
+        # fanout with raise
+        def func(n):
+            ary = np.arange(n)
+            # basically an impl of array.sum
+            c = 0
+            # The raise is from StopIteration of next(iterator) implicit in
+            # the for loop
+            for v in np.nditer(ary):
+                c += v.item()
+            return 1
+
+        with set_refprune_flags('per_bb,fanout_raise'):
+            self.check(func, (types.intp), basicblock=True, diamond=False,
+                       fanout=False, fanout_raise=True)
+
 
 class TestRefPruneFlags(TestCase):
     def setUp(self):

From 09d6fc1d45e5e32f5982bb00c0ba66e5892a8f1b Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Tue, 14 Feb 2023 10:29:57 +0000
Subject: [PATCH 25/45] Updates based on PR #7255 feedback

- Fix a typo
- Rename _static_cudalib_path() to _cuda_home_static_cudalib_path() so
  that its purpose is clearer.
---
 numba/cuda/cuda_paths.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/numba/cuda/cuda_paths.py b/numba/cuda/cuda_paths.py
index 5419ce8d1c2..d195bbc2947 100644
--- a/numba/cuda/cuda_paths.py
+++ b/numba/cuda/cuda_paths.py
@@ -69,7 +69,7 @@ def _cudalib_path():
         return 'lib64'
 
 
-def _static_cudalib_path():
+def _cuda_home_static_cudalib_path():
     if IS_WIN32:
         return ('lib', 'x64')
     else:
@@ -91,7 +91,7 @@ def _get_static_cudalib_dir_path_decision():
     options = [
         ('Conda environment', get_conda_ctk()),
         ('Conda environment (NVIDIA package)', get_nvidia_static_cudalib_ctk()),
-        ('CUDA_HOME', get_cuda_home(*_static_cudalib_path())),
+        ('CUDA_HOME', get_cuda_home(*_cuda_home_static_cudalib_path())),
         ('System', get_system_ctk(_cudalib_path())),
     ]
     by, libdir = _find_valid_path(options)
@@ -140,7 +140,7 @@ def get_nvidia_nvvm_ctk():
     is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta'))
     if not is_conda_env:
         return
-    # Asssume the existence of NVVM to imply cudatoolkit installed
+    # Assume the existence of NVVM to imply cudatoolkit installed
     libdir = os.path.join(sys.prefix, 'nvvm', _cudalib_path())
     if not os.path.exists(libdir) or not os.path.isdir(libdir):
         return

From 08c336649005e55b7bdb62493de25f579aa5f409 Mon Sep 17 00:00:00 2001
From: Andre Masella <andre@masella.name>
Date: Wed, 15 Feb 2023 07:49:09 -0500
Subject: [PATCH 26/45] Use better assertion logic

As titled.

Co-authored-by: stuartarchibald <stuartarchibald@users.noreply.github.com>
---
 numba/tests/test_debuginfo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numba/tests/test_debuginfo.py b/numba/tests/test_debuginfo.py
index a0e3326c31a..d306d5f46ef 100644
--- a/numba/tests/test_debuginfo.py
+++ b/numba/tests/test_debuginfo.py
@@ -231,7 +231,7 @@ def foo(a):
                 if not started:
                     started = True
             elif op_expect and started:
-                self.assertTrue(False, "Math opcodes are not contiguous")
+                self.fail("Math opcodes are not contiguous")
         self.assertFalse(op_expect, "Math opcodes were not found")
 
         # Parse out metadata from end of each line, check it monotonically

From 0dafa706cd6cd03299596382547f05f6fdf8ddb0 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Tue, 31 Jan 2023 12:26:50 +0000
Subject: [PATCH 27/45] CUDA: Fix performance regression when lineinfo is
 enabled

Enabling lineinfo on CUDA would result in correct lineinfo being
generated but introduced performance regressions by preventing inlining
of device functions (reported by @fs-nv).

This commit rectifies the issue by allowing inlining to be performed
when only lineinfo is requested. The actual change is made to the code
that was in `DIBuilder.mark_subprogram()` but this is moved to
`BaseLower.pre_lower()` - the original position of this code violated
reasonable abstraction because it meant that calling
`DIBuilder.mark_subprogram()` actually changed the code being marked.

This requires corresponding changes to
`test_lineinfo_in_device_function()`. The test was looking for lineinfo
in a separate device function, but instead it needs to search for
lineinfo pertaining to an inlined device function. Additionally, it also
disallowed the emission of any section named `.debug` - however, when
device functions are inlined, a section named `.debug_str` is emitted to
hold the names of the inlined functions. Therefore, we instead disallow
a section named `.debug_info`.
---
 numba/core/debuginfo.py                  |  5 ---
 numba/core/lowering.py                   |  8 +++++
 numba/cuda/tests/cudapy/test_lineinfo.py | 43 +++++++-----------------
 3 files changed, 20 insertions(+), 36 deletions(-)

diff --git a/numba/core/debuginfo.py b/numba/core/debuginfo.py
index 53a7927f03c..7970fc8e626 100644
--- a/numba/core/debuginfo.py
+++ b/numba/core/debuginfo.py
@@ -322,11 +322,6 @@ def mark_subprogram(self, function, qualname, argnames, argtypes, line):
                                        argmap=argmap)
         function.set_metadata("dbg", di_subp)
 
-        # Don't marked alwaysinline functions as noinline.
-        if 'alwaysinline' not in function.attributes:
-            # disable inlining for this function for easier debugging
-            function.attributes.add('noinline')
-
     def finalize(self):
         dbgcu = cgutils.get_or_insert_named_metadata(self.module, self.DBG_CU_NAME)
         dbgcu.add(self.dicompileunit)
diff --git a/numba/core/lowering.py b/numba/core/lowering.py
index 252782e5bfd..8f688b3630c 100644
--- a/numba/core/lowering.py
+++ b/numba/core/lowering.py
@@ -123,6 +123,14 @@ def pre_lower(self):
                                        argtypes=self.fndesc.argtypes,
                                        line=self.defn_loc.line)
 
+        # When full debug info is enabled, disable inlining where possible, to
+        # improve the quality of the debug experience. 'alwaysinline' functions
+        # cannot have inlining disabled.
+        attributes = self.builder.function.attributes
+        full_debug = self.flags.debuginfo and not self.flags.dbg_directives_only
+        if full_debug and 'alwaysinline' not in attributes:
+            attributes.add('noinline')
+
     def post_lower(self):
         """
         Called after all blocks are lowered
diff --git a/numba/cuda/tests/cudapy/test_lineinfo.py b/numba/cuda/tests/cudapy/test_lineinfo.py
index de3dc823357..6a24458bffc 100644
--- a/numba/cuda/tests/cudapy/test_lineinfo.py
+++ b/numba/cuda/tests/cudapy/test_lineinfo.py
@@ -66,7 +66,7 @@ def _check(self, fn, sig, expect):
         # generated
         pat = (
             r'\.section\s+'  # .section directive beginning
-            r'\.debug_'      # Any section name beginning ".debug_"
+            r'\.debug_info'  # Section named ".debug_info"
         )
         match = re.compile(pat).search(ptx)
         self.assertIsNone(match, msg=ptx)
@@ -141,51 +141,32 @@ def caller(x):
         ptx = caller.inspect_asm(sig)
         ptxlines = ptx.splitlines()
 
-        # To check the device function, we need to identify its boundaries.
+        # Check that there is no device function in the PTX
 
-        # A line beginning with ".weak .func"
+        # A line beginning with ".weak .func" that identifies a device function
         devfn_start = re.compile(r'^\.weak\s*\.func')
 
-        # Identify the beginning of the function.
-        start = None
-
-        for lineno, line in enumerate(ptxlines):
+        for line in ptxlines:
             if devfn_start.match(line) is not None:
-                # We will begin our search on the line following the
-                # declaration
-                start = lineno + 1
-                break
-
-        if start is None:
-            self.fail(f'Could not identify device function in:\n\n{ptx}')
-
-        # Identify the end of the function
-        end = None
-
-        for offset, line in enumerate(ptxlines[start:]):
-            # Assume the end of the function is a line with an unindented '}'
-            if line[:1] == '}':
-                end = start + offset
-                break
+                self.fail(f"Found device function in PTX:\n\n{ptx}")
 
-        if end is None:
-            self.fail(f'Could not identify end of device function in:\n\n{ptx}')
+        # Scan for .loc directives taht refer to an inlined device function
 
-        # Scan for .loc directives in the device function.
         loc_directive = self._loc_directive_regex()
         found = False
 
-        for line in ptxlines[start:end]:
+        for line in ptxlines:
             if loc_directive.search(line) is not None:
-                found = True
-                break
+                if 'inlined_at' in line:
+                    found = True
+                    break
 
         if not found:
             # Join one line either side so the function as a whole is shown,
             # i.e. including the declaration and parameter list, and the
             # closing brace.
-            devfn = "\n".join(ptxlines[start - 1:end + 1])
-            self.fail(f'.loc directive not found in:\n\n{devfn}')
+            self.fail(f'No .loc directive with inlined_at info found'
+                      f'in:\n\n{ptx}')
 
         # We also inspect the LLVM to ensure that there's debug info for each
         # subprogram (function). A lightweight way to check this is to ensure

From 3c2cedcb28de18471843a89c57d3453a18f98e8b Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Mon, 20 Feb 2023 15:12:23 +0000
Subject: [PATCH 28/45] Update vendored Versioneer from 0.14 to 0.28

This is primarily done to remove its distutils dependency.

Per the upgrade notes (see
https://github.com/python-versioneer/python-versioneer/blob/master/UPGRADING.md),
the configuration of Versioneer is moved into `setup.cfg` from `setup.py`.

The other notes on upgrading between Versions 0.14 and 0.28 don't apply
to our particular configuration as we're not presently using
pyproject.toml.

`versioneer.py` is also added to the Flake8 ignore list, as its format
no longer conforms to our rules.
---
 .flake8           |    1 +
 numba/_version.py |  678 ++++++++++---
 setup.cfg         |   13 +
 setup.py          |    6 -
 versioneer.py     | 2465 +++++++++++++++++++++++++++++++++------------
 5 files changed, 2376 insertions(+), 787 deletions(-)
 create mode 100644 setup.cfg

diff --git a/.flake8 b/.flake8
index deba6b9e0cf..34dc65eda7c 100644
--- a/.flake8
+++ b/.flake8
@@ -29,6 +29,7 @@ exclude =
     __init__.py
     # Ignore vendored files
     numba/cloudpickle/*
+    versioneer.py
     # Grandfather in existing failing files.  This list should shrink over time
     numba/stencils/stencil.py
     numba/core/transforms.py
diff --git a/numba/_version.py b/numba/_version.py
index be5315b7bb2..decd3ca772b 100644
--- a/numba/_version.py
+++ b/numba/_version.py
@@ -1,106 +1,198 @@
+
 # This file helps to compute a version number in source trees obtained from
 # git-archive tarball (such as those provided by githubs download-from-tag
 # feature). Distribution tarballs (built by setup.py sdist) and build
 # directories (produced by setup.py build) will contain a much shorter file
 # that just contains the computed version number.
 
-# This file is released into the public domain. Generated by
-# versioneer-0.14 (https://github.com/warner/python-versioneer)
+# This file is released into the public domain.
+# Generated by versioneer-0.28
+# https://github.com/python-versioneer/python-versioneer
+
+"""Git implementation of _version.py."""
 
 import errno
 import os
 import re
 import subprocess
 import sys
+from typing import Callable, Dict
+import functools
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "$Format:%d$"
+    git_full = "$Format:%H$"
+    git_date = "$Format:%ci$"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "pep440"
+    cfg.tag_prefix = ""
+    cfg.parentdir_prefix = "numba-"
+    cfg.versionfile_source = "numba/_version.py"
+    cfg.verbose = False
+    return cfg
 
-# these strings will be replaced by git during git-archive
-git_refnames = "$Format:%d$"
-git_full = "$Format:%H$"
 
-# these strings are filled in when 'setup.py versioneer' creates _version.py
-tag_prefix = ""
-parentdir_prefix = "numba-"
-versionfile_source = "numba/_version.py"
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
 
 
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
     assert isinstance(commands, list)
-    p = None
-    for c in commands:
+    process = None
+
+    popen_kwargs = {}
+    if sys.platform == "win32":
+        # This hides the console window if pythonw.exe is used
+        startupinfo = subprocess.STARTUPINFO()
+        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+        popen_kwargs["startupinfo"] = startupinfo
+
+    for command in commands:
         try:
+            dispcmd = str([command] + args)
             # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE,
-                                 stderr=(subprocess.PIPE if hide_stderr
-                                         else None))
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None), **popen_kwargs)
             break
-        except EnvironmentError:
+        except OSError:
             e = sys.exc_info()[1]
             if e.errno == errno.ENOENT:
                 continue
             if verbose:
-                print("unable to run %s" % args[0])
+                print("unable to run %s" % dispcmd)
                 print(e)
-            return None
+            return None, None
     else:
         if verbose:
             print("unable to find command, tried %s" % (commands,))
-        return None
-    stdout = p.communicate()[0].strip().decode()
-    if p.returncode != 0:
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
         if verbose:
-            print("unable to run %s (error)" % args[0])
-        return None
-    return stdout
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
 
-
-def versions_from_parentdir(parentdir_prefix, root, verbose=False):
-    # Source tarballs conventionally unpack into a directory that includes
-    # both the project name and a version string.
-    dirname = os.path.basename(root)
-    if not dirname.startswith(parentdir_prefix):
-        if verbose:
-            print("guessing rootdir is '%s', but '%s' doesn't start with "
-                  "prefix '%s'" % (root, dirname, parentdir_prefix))
-        return None
-    return {"version": dirname[len(parentdir_prefix):], "full": ""}
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
 
 
+@register_vcs_handler("git", "get_keywords")
 def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
     # the code embedded in _version.py can just fetch the value of these
     # keywords. When used from setup.py, we don't want to import _version.py,
     # so we do it with a regexp instead. This function is not used from
     # _version.py.
     keywords = {}
     try:
-        f = open(versionfile_abs, "r")
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-        f.close()
-    except EnvironmentError:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
         pass
     return keywords
 
 
-def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
-    if not keywords:
-        return {}  # keyword-finding function failed to find keywords
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
     refnames = keywords["refnames"].strip()
     if refnames.startswith("$Format"):
         if verbose:
             print("keywords are unexpanded, not using")
-        return {}  # unexpanded, so not in an unpacked git-archive tarball
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
     TAG = "tag: "
-    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
     if not tags:
         # Either we're using git < 1.8.3, or there really are no tags. We use
         # a heuristic: assume all version tags have a digit. The old git %d
@@ -109,128 +201,458 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
         # between branches and tags. By ignoring refnames without digits, we
         # filter out many common branch names like "release" and
         # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r'\d', r)])
+        tags = {r for r in refs if re.search(r'\d', r)}
         if verbose:
-            print("discarding '%s', no digits" % ",".join(refs-tags))
+            print("discarding '%s', no digits" % ",".join(refs - tags))
     if verbose:
         print("likely tags: %s" % ",".join(sorted(tags)))
     for ref in sorted(tags):
         # sorting will prefer e.g. "2.0" over "2.0rc1"
         if ref.startswith(tag_prefix):
             r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
             if verbose:
                 print("picking %s" % r)
             return {"version": r,
-                    "full": keywords["full"].strip()}
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
     # no suitable tags, so version is "0+unknown", but full hex is still there
     if verbose:
         print("no suitable tags, using unknown + full revision id")
     return {"version": "0+unknown",
-            "full": keywords["full"].strip()}
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
 
 
-def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False):
-    # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens.
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
 
-    # dirty
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+
+    # GIT_DIR can interfere with correct operation of Versioneer.
+    # It may be intended to be passed to the Versioneer-versioned project,
+    # but that should not change where we get our version from.
+    env = os.environ.copy()
+    env.pop("GIT_DIR", None)
+    runner = functools.partial(runner, env=env)
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=not verbose)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, [
+        "describe", "--tags", "--dirty", "--always", "--long",
+        "--match", f"{tag_prefix}[[:digit:]]*"
+    ], cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
     dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
     if dirty:
         git_describe = git_describe[:git_describe.rindex("-dirty")]
-    dirty_suffix = ".dirty" if dirty else ""
 
     # now we have TAG-NUM-gHEX or HEX
 
-    if "-" not in git_describe:  # just HEX
-        return "0+untagged.g"+git_describe+dirty_suffix, dirty
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
 
-    # just TAG-NUM-gHEX
-    mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
-    if not mo:
-        # unparseable. Maybe git-describe is misbehaving?
-        return "0+unparseable"+dirty_suffix, dirty
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
 
-    # tag
-    full_tag = mo.group(1)
-    if not full_tag.startswith(tag_prefix):
-        if verbose:
-            fmt = "tag '%s' doesn't start with prefix '%s'"
-            print(fmt % (full_tag, tag_prefix))
-        return None, dirty
-    tag = full_tag[len(tag_prefix):]
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
 
-    # distance: number of commits since tag
-    distance = int(mo.group(2))
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
+        pieces["distance"] = len(out.split())  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
 
-    # commit: short hex revision ID
-    commit = mo.group(3)
 
-    # now build up version string, with post-release "local version
-    # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a
-    # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you
-    # can always test version.endswith(".dirty").
-    version = tag
-    if distance or dirty:
-        version += "+%d.g%s" % (distance, commit) + dirty_suffix
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
 
-    return version, dirty
+    Like 'git describe --tags --dirty --always'.
 
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
 
-def git_versions_from_vcs(tag_prefix, root, verbose=False):
-    # this runs 'git' from the root of the source tree. This only gets called
-    # if the git-archive 'subst' keywords were *not* expanded, and
-    # _version.py hasn't already been rewritten with a short version string,
-    # meaning we're inside a checked out source tree.
 
-    if not os.path.exists(os.path.join(root, ".git")):
-        if verbose:
-            print("no .git in %s" % root)
-        return {}  # get_versions() will try next method
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
 
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-    # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
-    # if there are no tags, this yields HEX[-dirty] (no NUM)
-    stdout = run_command(GITS, ["describe", "--tags", "--dirty",
-                                "--always", "--long"],
-                         cwd=root)
-    # --long was added in git-1.5.5
-    if stdout is None:
-        return {}  # try next method
-    version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose)
-
-    # build "full", which is FULLHEX[.dirty]
-    stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if stdout is None:
-        return {}
-    full = stdout.strip()
-    if dirty:
-        full += ".dirty"
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
 
-    return {"version": version, "full": full}
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
 
 
-def get_versions(default={"version": "0+unknown", "full": ""}, verbose=False):
+def get_versions():
+    """Get version information or return default if unable to do so."""
     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
     # __file__, we can work backwards from there to the root. Some
     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
     # case we can only use expanded keywords.
 
-    keywords = {"refnames": git_refnames, "full": git_full}
-    ver = git_versions_from_keywords(keywords, tag_prefix, verbose)
-    if ver:
-        return ver
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
 
     try:
         root = os.path.realpath(__file__)
         # versionfile_source is the relative path from the top of the source
         # tree (where the .git directory might live) to this file. Invert
         # this to find the root from __file__.
-        for i in versionfile_source.split('/'):
+        for _ in cfg.versionfile_source.split('/'):
             root = os.path.dirname(root)
     except NameError:
-        return default
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
 
-    return (git_versions_from_vcs(tag_prefix, root, verbose)
-            or versions_from_parentdir(parentdir_prefix, root, verbose)
-            or default)
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 00000000000..d5f7ac93773
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,13 @@
+
+# See the docstring in versioneer.py for instructions. Note that you must
+# re-run 'versioneer.py setup' after changing this section, and commit the
+# resulting files.
+
+[versioneer]
+VCS = git
+style = pep440
+versionfile_source = numba/_version.py
+versionfile_build = numba/_version.py
+tag_prefix =
+parentdir_prefix = numba-
+
diff --git a/setup.py b/setup.py
index 6a132b58ab0..c4d92705677 100644
--- a/setup.py
+++ b/setup.py
@@ -57,12 +57,6 @@ def run(self):
         spawn(['make', '-C', 'docs', 'html'])
 
 
-versioneer.VCS = 'git'
-versioneer.versionfile_source = 'numba/_version.py'
-versioneer.versionfile_build = 'numba/_version.py'
-versioneer.tag_prefix = ''
-versioneer.parentdir_prefix = 'numba-'
-
 cmdclass = versioneer.get_cmdclass()
 cmdclass['build_doc'] = build_doc
 
diff --git a/versioneer.py b/versioneer.py
index c00770fe4ff..18e34c2f535 100644
--- a/versioneer.py
+++ b/versioneer.py
@@ -1,23 +1,20 @@
 
-# Version: 0.14
+# Version: 0.28
+
+"""The Versioneer - like a rocketeer, but for versions.
 
-"""
 The Versioneer
 ==============
 
 * like a rocketeer, but for versions!
-* https://github.com/warner/python-versioneer
+* https://github.com/python-versioneer/python-versioneer
 * Brian Warner
-* License: Public Domain
-* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, and pypy
-* [![Latest Version]
-(https://pypip.in/version/versioneer/badge.svg?style=flat)
-](https://pypi.python.org/pypi/versioneer/)
-* [![Build Status]
-(https://travis-ci.org/warner/python-versioneer.png?branch=master)
-](https://travis-ci.org/warner/python-versioneer)
-
-This is a tool for managing a recorded version number in distutils-based
+* License: Public Domain (Unlicense)
+* Compatible with: Python 3.7, 3.8, 3.9, 3.10 and pypy3
+* [![Latest Version][pypi-image]][pypi-url]
+* [![Build Status][travis-image]][travis-url]
+
+This is a tool for managing a recorded version number in setuptools-based
 python projects. The goal is to remove the tedious and error-prone "update
 the embedded version string" step from your release process. Making a new
 release should be as easy as recording a new tag in your version-control
@@ -26,9 +23,38 @@
 
 ## Quick Install
 
-* `pip install versioneer` to somewhere to your $PATH
-* run `versioneer-installer` in your source tree: this installs `versioneer.py`
-* follow the instructions below (also in the `versioneer.py` docstring)
+Versioneer provides two installation modes. The "classic" vendored mode installs
+a copy of versioneer into your repository. The experimental build-time dependency mode
+is intended to allow you to skip this step and simplify the process of upgrading.
+
+### Vendored mode
+
+* `pip install versioneer` to somewhere in your $PATH
+   * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is
+     available, so you can also use `conda install -c conda-forge versioneer`
+* add a `[tool.versioneer]` section to your `pyproject.toml` or a
+  `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md))
+   * Note that you will need to add `tomli; python_version < "3.11"` to your
+     build-time dependencies if you use `pyproject.toml`
+* run `versioneer install --vendor` in your source tree, commit the results
+* verify version information with `python setup.py version`
+
+### Build-time dependency mode
+
+* `pip install versioneer` to somewhere in your $PATH
+   * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is
+     available, so you can also use `conda install -c conda-forge versioneer`
+* add a `[tool.versioneer]` section to your `pyproject.toml` or a
+  `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md))
+* add `versioneer` (with `[toml]` extra, if configuring in `pyproject.toml`)
+  to the `requires` key of the `build-system` table in `pyproject.toml`:
+  ```toml
+  [build-system]
+  requires = ["setuptools", "versioneer[toml]"]
+  build-backend = "setuptools.build_meta"
+  ```
+* run `versioneer install --no-vendor` in your source tree, commit the results
+* verify version information with `python setup.py version`
 
 ## Version Identifiers
 
@@ -57,10 +83,10 @@
 enough information to help developers recreate the same tree, while also
 giving them an idea of roughly how old the tree is (after version 1.2, before
 version 1.3). Many VCS systems can report a description that captures this,
-for example 'git describe --tags --dirty --always' reports things like
+for example `git describe --tags --dirty --always` reports things like
 "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
-uncommitted changes.
+uncommitted changes).
 
 The version identifier is used for multiple purposes:
 
@@ -71,194 +97,175 @@
 
 Versioneer works by adding a special `_version.py` file into your source
 tree, where your `__init__.py` can import it. This `_version.py` knows how to
-dynamically ask the VCS tool for version information at import time. However,
-when you use "setup.py build" or "setup.py sdist", `_version.py` in the new
-copy is replaced by a small static file that contains just the generated
-version data.
+dynamically ask the VCS tool for version information at import time.
 
 `_version.py` also contains `$Revision$` markers, and the installation
 process marks `_version.py` to have this marker rewritten with a tag name
-during the "git archive" command. As a result, generated tarballs will
+during the `git archive` command. As a result, generated tarballs will
 contain enough information to get the proper version.
 
+To allow `setup.py` to compute a version too, a `versioneer.py` is added to
+the top level of your source tree, next to `setup.py` and the `setup.cfg`
+that configures it. This overrides several distutils/setuptools commands to
+compute the version when invoked, and changes `setup.py build` and `setup.py
+sdist` to replace `_version.py` with a small static file that contains just
+the generated version data.
 
 ## Installation
 
-First, decide on values for the following configuration variables:
+See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
 
-* `VCS`: the version control system you use. Currently accepts "git".
+## Version-String Flavors
 
-* `versionfile_source`:
+Code which uses Versioneer can learn about its version string at runtime by
+importing `_version` from your main `__init__.py` file and running the
+`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
+import the top-level `versioneer.py` and run `get_versions()`.
 
-  A project-relative pathname into which the generated version strings should
-  be written. This is usually a `_version.py` next to your project's main
-  `__init__.py` file, so it can be imported at runtime. If your project uses
-  `src/myproject/__init__.py`, this should be `src/myproject/_version.py`.
-  This file should be checked in to your VCS as usual: the copy created below
-  by `setup.py versioneer` will include code that parses expanded VCS
-  keywords in generated tarballs. The 'build' and 'sdist' commands will
-  replace it with a copy that has just the calculated version string.
+Both functions return a dictionary with different flavors of version
+information:
 
-  This must be set even if your project does not have any modules (and will
-  therefore never import `_version.py`), since "setup.py sdist" -based trees
-  still need somewhere to record the pre-calculated version strings. Anywhere
-  in the source tree should do. If there is a `__init__.py` next to your
-  `_version.py`, the `setup.py versioneer` command (described below) will
-  append some `__version__`-setting assignments, if they aren't already
-  present.
+* `['version']`: A condensed version string, rendered using the selected
+  style. This is the most commonly used value for the project's version
+  string. The default "pep440" style yields strings like `0.11`,
+  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
+  below for alternative styles.
 
-* `versionfile_build`:
+* `['full-revisionid']`: detailed revision identifier. For Git, this is the
+  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
 
-  Like `versionfile_source`, but relative to the build directory instead of
-  the source directory. These will differ when your setup.py uses
-  'package_dir='. If you have `package_dir={'myproject': 'src/myproject'}`,
-  then you will probably have `versionfile_build='myproject/_version.py'` and
-  `versionfile_source='src/myproject/_version.py'`.
+* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
+  commit date in ISO 8601 format. This will be None if the date is not
+  available.
 
-  If this is set to None, then `setup.py build` will not attempt to rewrite
-  any `_version.py` in the built tree. If your project does not have any
-  libraries (e.g. if it only builds a script), then you should use
-  `versionfile_build = None` and override `distutils.command.build_scripts`
-  to explicitly insert a copy of `versioneer.get_version()` into your
-  generated script.
+* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
+  this is only accurate if run in a VCS checkout, otherwise it is likely to
+  be False or None
 
-* `tag_prefix`:
+* `['error']`: if the version string could not be computed, this will be set
+  to a string describing the problem, otherwise it will be None. It may be
+  useful to throw an exception in setup.py if this is set, to avoid e.g.
+  creating tarballs with a version string of "unknown".
 
-  a string, like 'PROJECTNAME-', which appears at the start of all VCS tags.
-  If your tags look like 'myproject-1.2.0', then you should use
-  tag_prefix='myproject-'. If you use unprefixed tags like '1.2.0', this
-  should be an empty string.
+Some variants are more useful than others. Including `full-revisionid` in a
+bug report should allow developers to reconstruct the exact code being tested
+(or indicate the presence of local changes that should be shared with the
+developers). `version` is suitable for display in an "about" box or a CLI
+`--version` output: it can be easily compared against release notes and lists
+of bugs fixed in various releases.
 
-* `parentdir_prefix`:
+The installer adds the following text to your `__init__.py` to place a basic
+version in `YOURPROJECT.__version__`:
 
-  a string, frequently the same as tag_prefix, which appears at the start of
-  all unpacked tarball filenames. If your tarball unpacks into
-  'myproject-1.2.0', this should be 'myproject-'.
+    from ._version import get_versions
+    __version__ = get_versions()['version']
+    del get_versions
 
-This tool provides one script, named `versioneer-installer`. That script does
-one thing: write a copy of `versioneer.py` into the current directory.
+## Styles
 
-To versioneer-enable your project:
+The setup.cfg `style=` configuration controls how the VCS information is
+rendered into a version string.
 
-* 1: Run `versioneer-installer` to copy `versioneer.py` into the top of your
-  source tree.
+The default style, "pep440", produces a PEP440-compliant string, equal to the
+un-prefixed tag name for actual releases, and containing an additional "local
+version" section with more detail for in-between builds. For Git, this is
+TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
+--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
+tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
+that this commit is two revisions ("+2") beyond the "0.11" tag. For released
+software (exactly equal to a known tag), the identifier will only contain the
+stripped tag, e.g. "0.11".
 
-* 2: add the following lines to the top of your `setup.py`, with the
-  configuration values you decided earlier:
+Other styles are available. See [details.md](details.md) in the Versioneer
+source tree for descriptions.
 
-  ````
-  import versioneer
-  versioneer.VCS = 'git'
-  versioneer.versionfile_source = 'src/myproject/_version.py'
-  versioneer.versionfile_build = 'myproject/_version.py'
-  versioneer.tag_prefix = '' # tags are like 1.2.0
-  versioneer.parentdir_prefix = 'myproject-' # dirname like 'myproject-1.2.0'
-  ````
+## Debugging
 
-* 3: add the following arguments to the setup() call in your setup.py:
+Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
+to return a version of "0+unknown". To investigate the problem, run `setup.py
+version`, which will run the version-lookup code in a verbose mode, and will
+display the full contents of `get_versions()` (including the `error` string,
+which may help identify what went wrong).
 
-        version=versioneer.get_version(),
-        cmdclass=versioneer.get_cmdclass(),
+## Known Limitations
 
-* 4: now run `setup.py versioneer`, which will create `_version.py`, and will
-  modify your `__init__.py` (if one exists next to `_version.py`) to define
-  `__version__` (by calling a function from `_version.py`). It will also
-  modify your `MANIFEST.in` to include both `versioneer.py` and the generated
-  `_version.py` in sdist tarballs.
+Some situations are known to cause problems for Versioneer. This details the
+most significant ones. More can be found on Github
+[issues page](https://github.com/python-versioneer/python-versioneer/issues).
 
-* 5: commit these changes to your VCS. To make sure you won't forget,
-  `setup.py versioneer` will mark everything it touched for addition.
+### Subprojects
 
-## Post-Installation Usage
+Versioneer has limited support for source trees in which `setup.py` is not in
+the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
+two common reasons why `setup.py` might not be in the root:
 
-Once established, all uses of your tree from a VCS checkout should get the
-current version string. All generated tarballs should include an embedded
-version string (so users who unpack them will not need a VCS tool installed).
+* Source trees which contain multiple subprojects, such as
+  [Buildbot](https://github.com/buildbot/buildbot), which contains both
+  "master" and "slave" subprojects, each with their own `setup.py`,
+  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
+  distributions (and upload multiple independently-installable tarballs).
+* Source trees whose main purpose is to contain a C library, but which also
+  provide bindings to Python (and perhaps other languages) in subdirectories.
 
-If you distribute your project through PyPI, then the release process should
-boil down to two steps:
+Versioneer will look for `.git` in parent directories, and most operations
+should get the right version string. However `pip` and `setuptools` have bugs
+and implementation details which frequently cause `pip install .` from a
+subproject directory to fail to find a correct version string (so it usually
+defaults to `0+unknown`).
 
-* 1: git tag 1.0
-* 2: python setup.py register sdist upload
+`pip install --editable .` should work correctly. `setup.py install` might
+work too.
 
-If you distribute it through github (i.e. users use github to generate
-tarballs with `git archive`), the process is:
+Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
+some later version.
 
-* 1: git tag 1.0
-* 2: git push; git push --tags
+[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking
+this issue. The discussion in
+[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the
+issue from the Versioneer side in more detail.
+[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
+[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
+pip to let Versioneer work correctly.
 
-Currently, all version strings must be based upon a tag. Versioneer will
-report "unknown" until your tree has at least one tag in its history. This
-restriction will be fixed eventually (see issue #12).
+Versioneer-0.16 and earlier only looked for a `.git` directory next to the
+`setup.cfg`, so subprojects were completely unsupported with those releases.
 
-## Version-String Flavors
+### Editable installs with setuptools <= 18.5
 
-Code which uses Versioneer can learn about its version string at runtime by
-importing `_version` from your main `__init__.py` file and running the
-`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
-import the top-level `versioneer.py` and run `get_versions()`.
+`setup.py develop` and `pip install --editable .` allow you to install a
+project into a virtualenv once, then continue editing the source code (and
+test) without re-installing after every change.
 
-Both functions return a dictionary with different keys for different flavors
-of the version string:
-
-* `['version']`: A condensed PEP440-compliant string, equal to the
-  un-prefixed tag name for actual releases, and containing an additional
-  "local version" section with more detail for in-between builds. For Git,
-  this is TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe
-  --tags --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates
-  that the tree is like the "1076c97" commit but has uncommitted changes
-  (".dirty"), and that this commit is two revisions ("+2") beyond the "0.11"
-  tag. For released software (exactly equal to a known tag), the identifier
-  will only contain the stripped tag, e.g. "0.11".
-
-* `['full']`: detailed revision identifier. For Git, this is the full SHA1
-  commit id, followed by ".dirty" if the tree contains uncommitted changes,
-  e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac.dirty".
-
-Some variants are more useful than others. Including `full` in a bug report
-should allow developers to reconstruct the exact code being tested (or
-indicate the presence of local changes that should be shared with the
-developers). `version` is suitable for display in an "about" box or a CLI
-`--version` output: it can be easily compared against release notes and lists
-of bugs fixed in various releases.
+"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
+convenient way to specify executable scripts that should be installed along
+with the python package.
 
-The `setup.py versioneer` command adds the following text to your
-`__init__.py` to place a basic version in `YOURPROJECT.__version__`:
+These both work as expected when using modern setuptools. When using
+setuptools-18.5 or earlier, however, certain operations will cause
+`pkg_resources.DistributionNotFound` errors when running the entrypoint
+script, which must be resolved by re-installing the package. This happens
+when the install happens with one version, then the egg_info data is
+regenerated while a different version is checked out. Many setup.py commands
+cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
+a different virtualenv), so this can be surprising.
+
+[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes
+this one, but upgrading to a newer version of setuptools should probably
+resolve it.
 
-    from ._version import get_versions
-    __version__ = get_versions()['version']
-    del get_versions
 
 ## Updating Versioneer
 
 To upgrade your project to a new release of Versioneer, do the following:
 
 * install the new Versioneer (`pip install -U versioneer` or equivalent)
-* re-run `versioneer-installer` in your source tree to replace your copy of
-  `versioneer.py`
-* edit `setup.py`, if necessary, to include any new configuration settings
-  indicated by the release notes
-* re-run `setup.py versioneer` to replace `SRC/_version.py`
+* edit `setup.cfg` and `pyproject.toml`, if necessary,
+  to include any new configuration settings indicated by the release notes.
+  See [UPGRADING](./UPGRADING.md) for details.
+* re-run `versioneer install --[no-]vendor` in your source tree, to replace
+  `SRC/_version.py`
 * commit any changed files
 
-### Upgrading from 0.10 to 0.11
-
-You must add a `versioneer.VCS = "git"` to your `setup.py` before re-running
-`setup.py versioneer`. This will enable the use of additional version-control
-systems (SVN, etc) in the future.
-
-### Upgrading from 0.11 to 0.12
-
-Nothing special.
-
-## Upgrading to 0.14
-
-0.14 changes the format of the version string. 0.13 and earlier used
-hyphen-separated strings like "0.11-2-g1076c97-dirty". 0.14 and beyond use a
-plus-separated "local version" section strings, with dot-separated
-components, like "0.11+2.g1076c97". PEP440-strict tools did not like the old
-format, but should be ok with the new one.
-
 ## Future Directions
 
 This tool is designed to make it easily extended to other version-control
@@ -272,172 +279,393 @@
 direction and include code from all supported VCS systems, reducing the
 number of intermediate scripts.
 
+## Similar projects
+
+* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time
+  dependency
+* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of
+  versioneer
+* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools
+  plugin
 
 ## License
 
-To make Versioneer easier to embed, all its code is hereby released into the
-public domain. The `_version.py` that it creates is also in the public
-domain.
+To make Versioneer easier to embed, all its code is dedicated to the public
+domain. The `_version.py` that it creates is also in the public domain.
+Specifically, both are released under the "Unlicense", as described in
+https://unlicense.org/.
+
+[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg
+[pypi-url]: https://pypi.python.org/pypi/versioneer/
+[travis-image]:
+https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg
+[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer
 
 """
+# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring
+# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements
+# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error
+# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with
+# pylint:disable=attribute-defined-outside-init,too-many-arguments
 
+import configparser
 import errno
+import json
 import os
 import re
 import subprocess
 import sys
-from distutils.command.build import build as _build
-from distutils.command.sdist import sdist as _sdist
-from distutils.core import Command
-
-# these configuration settings will be overridden by setup.py after it
-# imports us
-versionfile_source = None
-versionfile_build = None
-tag_prefix = None
-parentdir_prefix = None
-VCS = None
+from pathlib import Path
+from typing import Callable, Dict
+import functools
+
+have_tomllib = True
+if sys.version_info >= (3, 11):
+    import tomllib
+else:
+    try:
+        import tomli as tomllib
+    except ImportError:
+        have_tomllib = False
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_root():
+    """Get the project root directory.
+
+    We require that all commands are run from the project root, i.e. the
+    directory that contains setup.py, setup.cfg, and versioneer.py .
+    """
+    root = os.path.realpath(os.path.abspath(os.getcwd()))
+    setup_py = os.path.join(root, "setup.py")
+    versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        # allow 'python path/to/setup.py COMMAND'
+        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
+        setup_py = os.path.join(root, "setup.py")
+        versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        err = ("Versioneer was unable to run the project root directory. "
+               "Versioneer requires setup.py to be executed from "
+               "its immediate directory (like 'python setup.py COMMAND'), "
+               "or in a way that lets it use sys.argv[0] to find the root "
+               "(like 'python path/to/setup.py COMMAND').")
+        raise VersioneerBadRootError(err)
+    try:
+        # Certain runtime workflows (setup.py install/develop in a setuptools
+        # tree) execute all dependencies in a single python process, so
+        # "versioneer" may be imported multiple times, and python's shared
+        # module-import table will cache the first one. So we can't use
+        # os.path.dirname(__file__), as that will find whichever
+        # versioneer.py was first imported, even in later projects.
+        my_path = os.path.realpath(os.path.abspath(__file__))
+        me_dir = os.path.normcase(os.path.splitext(my_path)[0])
+        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
+        if me_dir != vsr_dir and "VERSIONEER_PEP518" not in globals():
+            print("Warning: build in %s is using versioneer.py from %s"
+                  % (os.path.dirname(my_path), versioneer_py))
+    except NameError:
+        pass
+    return root
+
+
+def get_config_from_root(root):
+    """Read the project setup.cfg file to determine Versioneer config."""
+    # This might raise OSError (if setup.cfg is missing), or
+    # configparser.NoSectionError (if it lacks a [versioneer] section), or
+    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
+    # the top of versioneer.py for instructions on writing your setup.cfg .
+    root = Path(root)
+    pyproject_toml = root / "pyproject.toml"
+    setup_cfg = root / "setup.cfg"
+    section = None
+    if pyproject_toml.exists() and have_tomllib:
+        try:
+            with open(pyproject_toml, 'rb') as fobj:
+                pp = tomllib.load(fobj)
+            section = pp['tool']['versioneer']
+        except (tomllib.TOMLDecodeError, KeyError):
+            pass
+    if not section:
+        parser = configparser.ConfigParser()
+        with open(setup_cfg) as cfg_file:
+            parser.read_file(cfg_file)
+        parser.get("versioneer", "VCS")  # raise error if missing
+
+        section = parser["versioneer"]
+
+    cfg = VersioneerConfig()
+    cfg.VCS = section['VCS']
+    cfg.style = section.get("style", "")
+    cfg.versionfile_source = section.get("versionfile_source")
+    cfg.versionfile_build = section.get("versionfile_build")
+    cfg.tag_prefix = section.get("tag_prefix")
+    if cfg.tag_prefix in ("''", '""', None):
+        cfg.tag_prefix = ""
+    cfg.parentdir_prefix = section.get("parentdir_prefix")
+    cfg.verbose = section.get("verbose")
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
 
 # these dictionaries contain VCS-specific tools
-LONG_VERSION_PY = {}
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        HANDLERS.setdefault(vcs, {})[method] = f
+        return f
+    return decorate
 
 
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
     assert isinstance(commands, list)
-    p = None
-    for c in commands:
+    process = None
+
+    popen_kwargs = {}
+    if sys.platform == "win32":
+        # This hides the console window if pythonw.exe is used
+        startupinfo = subprocess.STARTUPINFO()
+        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+        popen_kwargs["startupinfo"] = startupinfo
+
+    for command in commands:
         try:
+            dispcmd = str([command] + args)
             # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE,
-                                 stderr=(subprocess.PIPE if hide_stderr
-                                         else None))
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None), **popen_kwargs)
             break
-        except EnvironmentError:
+        except OSError:
             e = sys.exc_info()[1]
             if e.errno == errno.ENOENT:
                 continue
             if verbose:
-                print("unable to run %s" % args[0])
+                print("unable to run %s" % dispcmd)
                 print(e)
-            return None
+            return None, None
     else:
         if verbose:
             print("unable to find command, tried %s" % (commands,))
-        return None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
         if verbose:
-            print("unable to run %s (error)" % args[0])
-        return None
-    return stdout
-LONG_VERSION_PY['git'] = '''
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+LONG_VERSION_PY['git'] = r'''
 # This file helps to compute a version number in source trees obtained from
 # git-archive tarball (such as those provided by githubs download-from-tag
 # feature). Distribution tarballs (built by setup.py sdist) and build
 # directories (produced by setup.py build) will contain a much shorter file
 # that just contains the computed version number.
 
-# This file is released into the public domain. Generated by
-# versioneer-0.14 (https://github.com/warner/python-versioneer)
+# This file is released into the public domain.
+# Generated by versioneer-0.28
+# https://github.com/python-versioneer/python-versioneer
+
+"""Git implementation of _version.py."""
 
 import errno
 import os
 import re
 import subprocess
 import sys
+from typing import Callable, Dict
+import functools
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
+    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
+    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "%(STYLE)s"
+    cfg.tag_prefix = "%(TAG_PREFIX)s"
+    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
+    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
+    cfg.verbose = False
+    return cfg
 
-# these strings will be replaced by git during git-archive
-git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
-git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
 
-# these strings are filled in when 'setup.py versioneer' creates _version.py
-tag_prefix = "%(TAG_PREFIX)s"
-parentdir_prefix = "%(PARENTDIR_PREFIX)s"
-versionfile_source = "%(VERSIONFILE_SOURCE)s"
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
 
 
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
     assert isinstance(commands, list)
-    p = None
-    for c in commands:
+    process = None
+
+    popen_kwargs = {}
+    if sys.platform == "win32":
+        # This hides the console window if pythonw.exe is used
+        startupinfo = subprocess.STARTUPINFO()
+        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+        popen_kwargs["startupinfo"] = startupinfo
+
+    for command in commands:
         try:
+            dispcmd = str([command] + args)
             # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE,
-                                 stderr=(subprocess.PIPE if hide_stderr
-                                         else None))
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None), **popen_kwargs)
             break
-        except EnvironmentError:
+        except OSError:
             e = sys.exc_info()[1]
             if e.errno == errno.ENOENT:
                 continue
             if verbose:
-                print("unable to run %%s" %% args[0])
+                print("unable to run %%s" %% dispcmd)
                 print(e)
-            return None
+            return None, None
     else:
         if verbose:
             print("unable to find command, tried %%s" %% (commands,))
-        return None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
         if verbose:
-            print("unable to run %%s (error)" %% args[0])
-        return None
-    return stdout
-
+            print("unable to run %%s (error)" %% dispcmd)
+            print("stdout was %%s" %% stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
 
-def versions_from_parentdir(parentdir_prefix, root, verbose=False):
-    # Source tarballs conventionally unpack into a directory that includes
-    # both the project name and a version string.
-    dirname = os.path.basename(root)
-    if not dirname.startswith(parentdir_prefix):
-        if verbose:
-            print("guessing rootdir is '%%s', but '%%s' doesn't start with "
-                  "prefix '%%s'" %% (root, dirname, parentdir_prefix))
-        return None
-    return {"version": dirname[len(parentdir_prefix):], "full": ""}
+    if verbose:
+        print("Tried directories %%s but none started with prefix %%s" %%
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
 
 
+@register_vcs_handler("git", "get_keywords")
 def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
     # the code embedded in _version.py can just fetch the value of these
     # keywords. When used from setup.py, we don't want to import _version.py,
     # so we do it with a regexp instead. This function is not used from
     # _version.py.
     keywords = {}
     try:
-        f = open(versionfile_abs, "r")
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-        f.close()
-    except EnvironmentError:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
         pass
     return keywords
 
 
-def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
-    if not keywords:
-        return {}  # keyword-finding function failed to find keywords
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
     refnames = keywords["refnames"].strip()
     if refnames.startswith("$Format"):
         if verbose:
             print("keywords are unexpanded, not using")
-        return {}  # unexpanded, so not in an unpacked git-archive tarball
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
     TAG = "tag: "
-    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
     if not tags:
         # Either we're using git < 1.8.3, or there really are no tags. We use
         # a heuristic: assume all version tags have a digit. The old git %%d
@@ -446,170 +674,520 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
         # between branches and tags. By ignoring refnames without digits, we
         # filter out many common branch names like "release" and
         # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r'\d', r)])
+        tags = {r for r in refs if re.search(r'\d', r)}
         if verbose:
-            print("discarding '%%s', no digits" %% ",".join(refs-tags))
+            print("discarding '%%s', no digits" %% ",".join(refs - tags))
     if verbose:
         print("likely tags: %%s" %% ",".join(sorted(tags)))
     for ref in sorted(tags):
         # sorting will prefer e.g. "2.0" over "2.0rc1"
         if ref.startswith(tag_prefix):
             r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
             if verbose:
                 print("picking %%s" %% r)
             return {"version": r,
-                    "full": keywords["full"].strip()}
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
     # no suitable tags, so version is "0+unknown", but full hex is still there
     if verbose:
         print("no suitable tags, using unknown + full revision id")
     return {"version": "0+unknown",
-            "full": keywords["full"].strip()}
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+
+    # GIT_DIR can interfere with correct operation of Versioneer.
+    # It may be intended to be passed to the Versioneer-versioned project,
+    # but that should not change where we get our version from.
+    env = os.environ.copy()
+    env.pop("GIT_DIR", None)
+    runner = functools.partial(runner, env=env)
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=not verbose)
+    if rc != 0:
+        if verbose:
+            print("Directory %%s not under git control" %% root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, [
+        "describe", "--tags", "--dirty", "--always", "--long",
+        "--match", f"{tag_prefix}[[:digit:]]*"
+    ], cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
 
+    pieces["branch"] = branch_name
 
-def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False):
-    # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens.
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
 
-    # dirty
+    # look for -dirty suffix
     dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
     if dirty:
         git_describe = git_describe[:git_describe.rindex("-dirty")]
-    dirty_suffix = ".dirty" if dirty else ""
 
     # now we have TAG-NUM-gHEX or HEX
 
-    if "-" not in git_describe:  # just HEX
-        return "0+untagged.g"+git_describe+dirty_suffix, dirty
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%%s'"
+                               %% describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%%s' doesn't start with prefix '%%s'"
+                print(fmt %% (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
+                               %% (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
 
-    # just TAG-NUM-gHEX
-    mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
-    if not mo:
-        # unparseable. Maybe git-describe is misbehaving?
-        return "0+unparseable"+dirty_suffix, dirty
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
 
-    # tag
-    full_tag = mo.group(1)
-    if not full_tag.startswith(tag_prefix):
-        if verbose:
-            fmt = "tag '%%s' doesn't start with prefix '%%s'"
-            print(fmt %% (full_tag, tag_prefix))
-        return None, dirty
-    tag = full_tag[len(tag_prefix):]
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
 
-    # distance: number of commits since tag
-    distance = int(mo.group(2))
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
+        pieces["distance"] = len(out.split())  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%%d.dev%%d" %% (post_version + 1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%%d" %% (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%%d" %% pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
 
-    # commit: short hex revision ID
-    commit = mo.group(3)
 
-    # now build up version string, with post-release "local version
-    # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a
-    # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you
-    # can always test version.endswith(".dirty").
-    version = tag
-    if distance or dirty:
-        version += "+%%d.g%%s" %% (distance, commit) + dirty_suffix
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
 
-    return version, dirty
+    Like 'git describe --tags --dirty --always'.
 
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
 
-def git_versions_from_vcs(tag_prefix, root, verbose=False):
-    # this runs 'git' from the root of the source tree. This only gets called
-    # if the git-archive 'subst' keywords were *not* expanded, and
-    # _version.py hasn't already been rewritten with a short version string,
-    # meaning we're inside a checked out source tree.
 
-    if not os.path.exists(os.path.join(root, ".git")):
-        if verbose:
-            print("no .git in %%s" %% root)
-        return {}  # get_versions() will try next method
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
 
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-    # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
-    # if there are no tags, this yields HEX[-dirty] (no NUM)
-    stdout = run_command(GITS, ["describe", "--tags", "--dirty",
-                                "--always", "--long"],
-                         cwd=root)
-    # --long was added in git-1.5.5
-    if stdout is None:
-        return {}  # try next method
-    version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose)
-
-    # build "full", which is FULLHEX[.dirty]
-    stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if stdout is None:
-        return {}
-    full = stdout.strip()
-    if dirty:
-        full += ".dirty"
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%%s'" %% style)
 
-    return {"version": version, "full": full}
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
 
 
-def get_versions(default={"version": "0+unknown", "full": ""}, verbose=False):
+def get_versions():
+    """Get version information or return default if unable to do so."""
     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
     # __file__, we can work backwards from there to the root. Some
     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
     # case we can only use expanded keywords.
 
-    keywords = {"refnames": git_refnames, "full": git_full}
-    ver = git_versions_from_keywords(keywords, tag_prefix, verbose)
-    if ver:
-        return ver
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
 
     try:
         root = os.path.realpath(__file__)
         # versionfile_source is the relative path from the top of the source
         # tree (where the .git directory might live) to this file. Invert
         # this to find the root from __file__.
-        for i in versionfile_source.split('/'):
+        for _ in cfg.versionfile_source.split('/'):
             root = os.path.dirname(root)
     except NameError:
-        return default
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
 
-    return (git_versions_from_vcs(tag_prefix, root, verbose)
-            or versions_from_parentdir(parentdir_prefix, root, verbose)
-            or default)
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
 '''
 
 
+@register_vcs_handler("git", "get_keywords")
 def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
     # the code embedded in _version.py can just fetch the value of these
     # keywords. When used from setup.py, we don't want to import _version.py,
     # so we do it with a regexp instead. This function is not used from
     # _version.py.
     keywords = {}
     try:
-        f = open(versionfile_abs, "r")
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-        f.close()
-    except EnvironmentError:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
         pass
     return keywords
 
 
-def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
-    if not keywords:
-        return {}  # keyword-finding function failed to find keywords
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
     refnames = keywords["refnames"].strip()
     if refnames.startswith("$Format"):
         if verbose:
             print("keywords are unexpanded, not using")
-        return {}  # unexpanded, so not in an unpacked git-archive tarball
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
     TAG = "tag: "
-    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
     if not tags:
         # Either we're using git < 1.8.3, or there really are no tags. We use
         # a heuristic: assume all version tags have a digit. The old git %d
@@ -618,429 +1196,1010 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
         # between branches and tags. By ignoring refnames without digits, we
         # filter out many common branch names like "release" and
         # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r'\d', r)])
+        tags = {r for r in refs if re.search(r'\d', r)}
         if verbose:
-            print("discarding '%s', no digits" % ",".join(refs-tags))
+            print("discarding '%s', no digits" % ",".join(refs - tags))
     if verbose:
         print("likely tags: %s" % ",".join(sorted(tags)))
     for ref in sorted(tags):
         # sorting will prefer e.g. "2.0" over "2.0rc1"
         if ref.startswith(tag_prefix):
             r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
             if verbose:
                 print("picking %s" % r)
             return {"version": r,
-                    "full": keywords["full"].strip()}
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
     # no suitable tags, so version is "0+unknown", but full hex is still there
     if verbose:
         print("no suitable tags, using unknown + full revision id")
     return {"version": "0+unknown",
-            "full": keywords["full"].strip()}
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
 
 
-def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False):
-    # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens.
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
 
-    # dirty
-    dirty = git_describe.endswith("-dirty")
-    if dirty:
-        git_describe = git_describe[:git_describe.rindex("-dirty")]
-    dirty_suffix = ".dirty" if dirty else ""
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
 
-    # now we have TAG-NUM-gHEX or HEX
+    # GIT_DIR can interfere with correct operation of Versioneer.
+    # It may be intended to be passed to the Versioneer-versioned project,
+    # but that should not change where we get our version from.
+    env = os.environ.copy()
+    env.pop("GIT_DIR", None)
+    runner = functools.partial(runner, env=env)
 
-    if "-" not in git_describe:  # just HEX
-        return "0+untagged.g"+git_describe+dirty_suffix, dirty
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=not verbose)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, [
+        "describe", "--tags", "--dirty", "--always", "--long",
+        "--match", f"{tag_prefix}[[:digit:]]*"
+    ], cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
 
-    # just TAG-NUM-gHEX
-    mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
-    if not mo:
-        # unparseable. Maybe git-describe is misbehaving?
-        return "0+unparseable"+dirty_suffix, dirty
+    pieces["branch"] = branch_name
 
-    # tag
-    full_tag = mo.group(1)
-    if not full_tag.startswith(tag_prefix):
-        if verbose:
-            fmt = "tag '%s' doesn't start with prefix '%s'"
-            print(fmt % (full_tag, tag_prefix))
-        return None, dirty
-    tag = full_tag[len(tag_prefix):]
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
 
-    # distance: number of commits since tag
-    distance = int(mo.group(2))
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
 
-    # commit: short hex revision ID
-    commit = mo.group(3)
+    # now we have TAG-NUM-gHEX or HEX
 
-    # now build up version string, with post-release "local version
-    # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a
-    # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you
-    # can always test version.endswith(".dirty").
-    version = tag
-    if distance or dirty:
-        version += "+%d.g%s" % (distance, commit) + dirty_suffix
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
 
-    return version, dirty
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
 
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
 
-def git_versions_from_vcs(tag_prefix, root, verbose=False):
-    # this runs 'git' from the root of the source tree. This only gets called
-    # if the git-archive 'subst' keywords were *not* expanded, and
-    # _version.py hasn't already been rewritten with a short version string,
-    # meaning we're inside a checked out source tree.
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
+        pieces["distance"] = len(out.split())  # total number of commits
 
-    if not os.path.exists(os.path.join(root, ".git")):
-        if verbose:
-            print("no .git in %s" % root)
-        return {}  # get_versions() will try next method
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
 
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-    # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
-    # if there are no tags, this yields HEX[-dirty] (no NUM)
-    stdout = run_command(GITS, ["describe", "--tags", "--dirty",
-                                "--always", "--long"],
-                         cwd=root)
-    # --long was added in git-1.5.5
-    if stdout is None:
-        return {}  # try next method
-    version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose)
-
-    # build "full", which is FULLHEX[.dirty]
-    stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if stdout is None:
-        return {}
-    full = stdout.strip()
-    if dirty:
-        full += ".dirty"
+    return pieces
 
-    return {"version": version, "full": full}
 
+def do_vcs_install(versionfile_source, ipy):
+    """Git-specific installation logic for Versioneer.
 
-def do_vcs_install(manifest_in, versionfile_source, ipy):
+    For Git, this means creating/changing .gitattributes to mark _version.py
+    for export-subst keyword substitution.
+    """
     GITS = ["git"]
     if sys.platform == "win32":
         GITS = ["git.cmd", "git.exe"]
-    files = [manifest_in, versionfile_source]
+    files = [versionfile_source]
     if ipy:
         files.append(ipy)
-    try:
-        me = __file__
-        if me.endswith(".pyc") or me.endswith(".pyo"):
-            me = os.path.splitext(me)[0] + ".py"
-        versioneer_file = os.path.relpath(me)
-    except NameError:
-        versioneer_file = "versioneer.py"
-    files.append(versioneer_file)
+    if "VERSIONEER_PEP518" not in globals():
+        try:
+            my_path = __file__
+            if my_path.endswith((".pyc", ".pyo")):
+                my_path = os.path.splitext(my_path)[0] + ".py"
+            versioneer_file = os.path.relpath(my_path)
+        except NameError:
+            versioneer_file = "versioneer.py"
+        files.append(versioneer_file)
     present = False
     try:
-        f = open(".gitattributes", "r")
-        for line in f.readlines():
-            if line.strip().startswith(versionfile_source):
-                if "export-subst" in line.strip().split()[1:]:
-                    present = True
-        f.close()
-    except EnvironmentError:
+        with open(".gitattributes", "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith(versionfile_source):
+                    if "export-subst" in line.strip().split()[1:]:
+                        present = True
+                        break
+    except OSError:
         pass
     if not present:
-        f = open(".gitattributes", "a+")
-        f.write("%s export-subst\n" % versionfile_source)
-        f.close()
+        with open(".gitattributes", "a+") as fobj:
+            fobj.write(f"{versionfile_source} export-subst\n")
         files.append(".gitattributes")
     run_command(GITS, ["add", "--"] + files)
 
 
-def versions_from_parentdir(parentdir_prefix, root, verbose=False):
-    # Source tarballs conventionally unpack into a directory that includes
-    # both the project name and a version string.
-    dirname = os.path.basename(root)
-    if not dirname.startswith(parentdir_prefix):
-        if verbose:
-            print("guessing rootdir is '%s', but '%s' doesn't start with "
-                  "prefix '%s'" % (root, dirname, parentdir_prefix))
-        return None
-    return {"version": dirname[len(parentdir_prefix):], "full": ""}
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
 
 SHORT_VERSION_PY = """
-# This file was generated by 'versioneer.py' (0.14) from
+# This file was generated by 'versioneer.py' (0.28) from
 # revision-control system data, or from the parent directory name of an
 # unpacked source archive. Distribution tarballs contain a pre-generated copy
 # of this file.
 
-version_version = '%(version)s'
-version_full = '%(full)s'
-def get_versions(default={}, verbose=False):
-    return {'version': version_version, 'full': version_full}
+import json
+
+version_json = '''
+%s
+'''  # END VERSION_JSON
 
-"""
 
-DEFAULT = {"version": "0+unknown", "full": "unknown"}
+def get_versions():
+    return json.loads(version_json)
+"""
 
 
 def versions_from_file(filename):
-    versions = {}
+    """Try to determine the version from _version.py if present."""
     try:
         with open(filename) as f:
-            for line in f.readlines():
-                mo = re.match("version_version = '([^']+)'", line)
-                if mo:
-                    versions["version"] = mo.group(1)
-                mo = re.match("version_full = '([^']+)'", line)
-                if mo:
-                    versions["full"] = mo.group(1)
-    except EnvironmentError:
-        return {}
-
-    return versions
+            contents = f.read()
+    except OSError:
+        raise NotThisMethod("unable to read _version.py")
+    mo = re.search(r"version_json = '''\n(.*)'''  # END VERSION_JSON",
+                   contents, re.M | re.S)
+    if not mo:
+        mo = re.search(r"version_json = '''\r\n(.*)'''  # END VERSION_JSON",
+                       contents, re.M | re.S)
+    if not mo:
+        raise NotThisMethod("no version_json in _version.py")
+    return json.loads(mo.group(1))
 
 
 def write_to_version_file(filename, versions):
+    """Write the given version number to the given _version.py file."""
+    os.unlink(filename)
+    contents = json.dumps(versions, sort_keys=True,
+                          indent=1, separators=(",", ": "))
     with open(filename, "w") as f:
-        f.write(SHORT_VERSION_PY % versions)
+        f.write(SHORT_VERSION_PY % contents)
 
     print("set %s to '%s'" % (filename, versions["version"]))
 
 
-def get_root():
-    try:
-        return os.path.dirname(os.path.abspath(__file__))
-    except NameError:
-        return os.path.dirname(os.path.abspath(sys.argv[0]))
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
 
 
-def vcs_function(vcs, suffix):
-    return getattr(sys.modules[__name__], '%s_%s' % (vcs, suffix), None)
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
 
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+class VersioneerBadRootError(Exception):
+    """The project root directory is unknown or missing key files."""
+
+
+def get_versions(verbose=False):
+    """Get the project version from whatever source is available.
+
+    Returns dict with two keys: 'version' and 'full'.
+    """
+    if "versioneer" in sys.modules:
+        # see the discussion in cmdclass.py:get_cmdclass()
+        del sys.modules["versioneer"]
 
-def get_versions(default=DEFAULT, verbose=False):
-    # returns dict with two keys: 'version' and 'full'
-    assert versionfile_source is not None, \
-        "please set versioneer.versionfile_source"
-    assert tag_prefix is not None, "please set versioneer.tag_prefix"
-    assert parentdir_prefix is not None, \
-        "please set versioneer.parentdir_prefix"
-    assert VCS is not None, "please set versioneer.VCS"
-
-    # I am in versioneer.py, which must live at the top of the source tree,
-    # which we use to compute the root directory. py2exe/bbfreeze/non-CPython
-    # don't have __file__, in which case we fall back to sys.argv[0] (which
-    # ought to be the setup.py script). We prefer __file__ since that's more
-    # robust in cases where setup.py was invoked in some weird way (e.g. pip)
     root = get_root()
-    versionfile_abs = os.path.join(root, versionfile_source)
+    cfg = get_config_from_root(root)
+
+    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
+    handlers = HANDLERS.get(cfg.VCS)
+    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
+    verbose = verbose or cfg.verbose
+    assert cfg.versionfile_source is not None, \
+        "please set versioneer.versionfile_source"
+    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
+
+    versionfile_abs = os.path.join(root, cfg.versionfile_source)
 
-    # extract version from first of _version.py, VCS command (e.g. 'git
+    # extract version from first of: _version.py, VCS command (e.g. 'git
     # describe'), parentdir. This is meant to work for developers using a
     # source checkout, for users of a tarball created by 'setup.py sdist',
     # and for users of a tarball/zipball created by 'git archive' or github's
     # download-from-tag feature or the equivalent in other VCSes.
 
-    get_keywords_f = vcs_function(VCS, "get_keywords")
-    versions_from_keywords_f = vcs_function(VCS, "versions_from_keywords")
-    if get_keywords_f and versions_from_keywords_f:
-        vcs_keywords = get_keywords_f(versionfile_abs)
-        ver = versions_from_keywords_f(vcs_keywords, tag_prefix)
-        if ver:
+    get_keywords_f = handlers.get("get_keywords")
+    from_keywords_f = handlers.get("keywords")
+    if get_keywords_f and from_keywords_f:
+        try:
+            keywords = get_keywords_f(versionfile_abs)
+            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
             if verbose:
                 print("got version from expanded keyword %s" % ver)
             return ver
+        except NotThisMethod:
+            pass
 
-    ver = versions_from_file(versionfile_abs)
-    if ver:
+    try:
+        ver = versions_from_file(versionfile_abs)
         if verbose:
             print("got version from file %s %s" % (versionfile_abs, ver))
         return ver
+    except NotThisMethod:
+        pass
 
-    versions_from_vcs_f = vcs_function(VCS, "versions_from_vcs")
-    if versions_from_vcs_f:
-        ver = versions_from_vcs_f(tag_prefix, root, verbose)
-        if ver:
+    from_vcs_f = handlers.get("pieces_from_vcs")
+    if from_vcs_f:
+        try:
+            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
+            ver = render(pieces, cfg.style)
             if verbose:
                 print("got version from VCS %s" % ver)
             return ver
+        except NotThisMethod:
+            pass
 
-    ver = versions_from_parentdir(parentdir_prefix, root, verbose)
-    if ver:
-        if verbose:
-            print("got version from parentdir %s" % ver)
-        return ver
+    try:
+        if cfg.parentdir_prefix:
+            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+            if verbose:
+                print("got version from parentdir %s" % ver)
+            return ver
+    except NotThisMethod:
+        pass
 
     if verbose:
-        print("got version from default %s" % default)
-    return default
+        print("unable to compute version")
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None, "error": "unable to compute version",
+            "date": None}
+
+
+def get_version():
+    """Get the short version string for this project."""
+    return get_versions()["version"]
+
+
+def get_cmdclass(cmdclass=None):
+    """Get the custom setuptools subclasses used by Versioneer.
+
+    If the package uses a different cmdclass (e.g. one from numpy), it
+    should be provide as an argument.
+    """
+    if "versioneer" in sys.modules:
+        del sys.modules["versioneer"]
+        # this fixes the "python setup.py develop" case (also 'install' and
+        # 'easy_install .'), in which subdependencies of the main project are
+        # built (using setup.py bdist_egg) in the same python process. Assume
+        # a main project A and a dependency B, which use different versions
+        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
+        # sys.modules by the time B's setup.py is executed, causing B to run
+        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
+        # sandbox that restores sys.modules to it's pre-build state, so the
+        # parent is protected against the child's "import versioneer". By
+        # removing ourselves from sys.modules here, before the child build
+        # happens, we protect the child from the parent's versioneer too.
+        # Also see https://github.com/python-versioneer/python-versioneer/issues/52
+
+    cmds = {} if cmdclass is None else cmdclass.copy()
+
+    # we add "version" to setuptools
+    from setuptools import Command
+
+    class cmd_version(Command):
+        description = "report generated version string"
+        user_options = []
+        boolean_options = []
+
+        def initialize_options(self):
+            pass
 
+        def finalize_options(self):
+            pass
 
-def get_version(verbose=False):
-    return get_versions(verbose=verbose)["version"]
+        def run(self):
+            vers = get_versions(verbose=True)
+            print("Version: %s" % vers["version"])
+            print(" full-revisionid: %s" % vers.get("full-revisionid"))
+            print(" dirty: %s" % vers.get("dirty"))
+            print(" date: %s" % vers.get("date"))
+            if vers["error"]:
+                print(" error: %s" % vers["error"])
+    cmds["version"] = cmd_version
+
+    # we override "build_py" in setuptools
+    #
+    # most invocation pathways end up running build_py:
+    #  distutils/build -> build_py
+    #  distutils/install -> distutils/build ->..
+    #  setuptools/bdist_wheel -> distutils/install ->..
+    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
+    #  setuptools/install -> bdist_egg ->..
+    #  setuptools/develop -> ?
+    #  pip install:
+    #   copies source tree to a tempdir before running egg_info/etc
+    #   if .git isn't copied too, 'git describe' will fail
+    #   then does setup.py bdist_wheel, or sometimes setup.py install
+    #  setup.py egg_info -> ?
+
+    # pip install -e . and setuptool/editable_wheel will invoke build_py
+    # but the build_py command is not expected to copy any files.
+
+    # we override different "build_py" commands for both environments
+    if 'build_py' in cmds:
+        _build_py = cmds['build_py']
+    else:
+        from setuptools.command.build_py import build_py as _build_py
 
+    class cmd_build_py(_build_py):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_py.run(self)
+            if getattr(self, "editable_mode", False):
+                # During editable installs `.py` and data files are
+                # not copied to build_lib
+                return
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            if cfg.versionfile_build:
+                target_versionfile = os.path.join(self.build_lib,
+                                                  cfg.versionfile_build)
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+    cmds["build_py"] = cmd_build_py
+
+    if 'build_ext' in cmds:
+        _build_ext = cmds['build_ext']
+    else:
+        from setuptools.command.build_ext import build_ext as _build_ext
 
-class cmd_version(Command):
-    description = "report generated version string"
-    user_options = []
-    boolean_options = []
+    class cmd_build_ext(_build_ext):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_ext.run(self)
+            if self.inplace:
+                # build_ext --inplace will only build extensions in
+                # build/lib<..> dir with no _version.py to write to.
+                # As in place builds will already have a _version.py
+                # in the module dir, we do not need to write one.
+                return
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            if not cfg.versionfile_build:
+                return
+            target_versionfile = os.path.join(self.build_lib,
+                                              cfg.versionfile_build)
+            if not os.path.exists(target_versionfile):
+                print(f"Warning: {target_versionfile} does not exist, skipping "
+                      "version update. This can happen if you are running build_ext "
+                      "without first running build_py.")
+                return
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile, versions)
+    cmds["build_ext"] = cmd_build_ext
+
+    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
+        from cx_Freeze.dist import build_exe as _build_exe
+        # nczeczulin reports that py2exe won't like the pep440-style string
+        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
+        # setup(console=[{
+        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
+        #   "product_version": versioneer.get_version(),
+        #   ...
+
+        class cmd_build_exe(_build_exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _build_exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["build_exe"] = cmd_build_exe
+        del cmds["build_py"]
+
+    if 'py2exe' in sys.modules:  # py2exe enabled?
+        try:
+            from py2exe.setuptools_buildexe import py2exe as _py2exe
+        except ImportError:
+            from py2exe.distutils_buildexe import py2exe as _py2exe
+
+        class cmd_py2exe(_py2exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _py2exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["py2exe"] = cmd_py2exe
+
+    # sdist farms its file list building out to egg_info
+    if 'egg_info' in cmds:
+        _egg_info = cmds['egg_info']
+    else:
+        from setuptools.command.egg_info import egg_info as _egg_info
+
+    class cmd_egg_info(_egg_info):
+        def find_sources(self):
+            # egg_info.find_sources builds the manifest list and writes it
+            # in one shot
+            super().find_sources()
+
+            # Modify the filelist and normalize it
+            root = get_root()
+            cfg = get_config_from_root(root)
+            self.filelist.append('versioneer.py')
+            if cfg.versionfile_source:
+                # There are rare cases where versionfile_source might not be
+                # included by default, so we must be explicit
+                self.filelist.append(cfg.versionfile_source)
+            self.filelist.sort()
+            self.filelist.remove_duplicates()
+
+            # The write method is hidden in the manifest_maker instance that
+            # generated the filelist and was thrown away
+            # We will instead replicate their final normalization (to unicode,
+            # and POSIX-style paths)
+            from setuptools import unicode_utils
+            normalized = [unicode_utils.filesys_decode(f).replace(os.sep, '/')
+                          for f in self.filelist.files]
+
+            manifest_filename = os.path.join(self.egg_info, 'SOURCES.txt')
+            with open(manifest_filename, 'w') as fobj:
+                fobj.write('\n'.join(normalized))
+
+    cmds['egg_info'] = cmd_egg_info
+
+    # we override different "sdist" commands for both environments
+    if 'sdist' in cmds:
+        _sdist = cmds['sdist']
+    else:
+        from setuptools.command.sdist import sdist as _sdist
 
-    def initialize_options(self):
-        pass
+    class cmd_sdist(_sdist):
+        def run(self):
+            versions = get_versions()
+            self._versioneer_generated_versions = versions
+            # unless we update this, the command will keep using the old
+            # version
+            self.distribution.metadata.version = versions["version"]
+            return _sdist.run(self)
+
+        def make_release_tree(self, base_dir, files):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            _sdist.make_release_tree(self, base_dir, files)
+            # now locate _version.py in the new base_dir directory
+            # (remembering that it may be a hardlink) and replace it with an
+            # updated value
+            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile,
+                                  self._versioneer_generated_versions)
+    cmds["sdist"] = cmd_sdist
 
-    def finalize_options(self):
-        pass
+    return cmds
 
-    def run(self):
-        ver = get_version(verbose=True)
-        print("Version is currently: %s" % ver)
 
+CONFIG_ERROR = """
+setup.cfg is missing the necessary Versioneer configuration. You need
+a section like:
 
-class cmd_build(_build):
-    def run(self):
-        versions = get_versions(verbose=True)
-        _build.run(self)
-        # now locate _version.py in the new build/ directory and replace it
-        # with an updated value
-        if versionfile_build:
-            target_versionfile = os.path.join(self.build_lib,
-                                              versionfile_build)
-            print("UPDATING %s" % target_versionfile)
-            os.unlink(target_versionfile)
-            with open(target_versionfile, "w") as f:
-                f.write(SHORT_VERSION_PY % versions)
+ [versioneer]
+ VCS = git
+ style = pep440
+ versionfile_source = src/myproject/_version.py
+ versionfile_build = myproject/_version.py
+ tag_prefix =
+ parentdir_prefix = myproject-
 
-if 'cx_Freeze' in sys.modules:  # cx_freeze enabled?
-    from cx_Freeze.dist import build_exe as _build_exe
+You will also need to edit your setup.py to use the results:
 
-    class cmd_build_exe(_build_exe):
-        def run(self):
-            versions = get_versions(verbose=True)
-            target_versionfile = versionfile_source
-            print("UPDATING %s" % target_versionfile)
-            os.unlink(target_versionfile)
-            with open(target_versionfile, "w") as f:
-                f.write(SHORT_VERSION_PY % versions)
-
-            _build_exe.run(self)
-            os.unlink(target_versionfile)
-            with open(versionfile_source, "w") as f:
-                assert VCS is not None, "please set versioneer.VCS"
-                LONG = LONG_VERSION_PY[VCS]
-                f.write(LONG % {"DOLLAR": "$",
-                                "TAG_PREFIX": tag_prefix,
-                                "PARENTDIR_PREFIX": parentdir_prefix,
-                                "VERSIONFILE_SOURCE": versionfile_source,
-                                })
-
-
-class cmd_sdist(_sdist):
-    def run(self):
-        versions = get_versions(verbose=True)
-        self._versioneer_generated_versions = versions
-        # unless we update this, the command will keep using the old version
-        self.distribution.metadata.version = versions["version"]
-        return _sdist.run(self)
-
-    def make_release_tree(self, base_dir, files):
-        _sdist.make_release_tree(self, base_dir, files)
-        # now locate _version.py in the new base_dir directory (remembering
-        # that it may be a hardlink) and replace it with an updated value
-        target_versionfile = os.path.join(base_dir, versionfile_source)
-        print("UPDATING %s" % target_versionfile)
-        os.unlink(target_versionfile)
-        with open(target_versionfile, "w") as f:
-            f.write(SHORT_VERSION_PY % self._versioneer_generated_versions)
+ import versioneer
+ setup(version=versioneer.get_version(),
+       cmdclass=versioneer.get_cmdclass(), ...)
 
-INIT_PY_SNIPPET = """
+Please read the docstring in ./versioneer.py for configuration instructions,
+edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
+"""
+
+SAMPLE_CONFIG = """
+# See the docstring in versioneer.py for instructions. Note that you must
+# re-run 'versioneer.py setup' after changing this section, and commit the
+# resulting files.
+
+[versioneer]
+#VCS = git
+#style = pep440
+#versionfile_source =
+#versionfile_build =
+#tag_prefix =
+#parentdir_prefix =
+
+"""
+
+OLD_SNIPPET = """
 from ._version import get_versions
 __version__ = get_versions()['version']
 del get_versions
 """
 
+INIT_PY_SNIPPET = """
+from . import {0}
+__version__ = {0}.get_versions()['version']
+"""
 
-class cmd_update_files(Command):
-    description = ("install/upgrade Versioneer files: "
-                   "__init__.py SRC/_version.py")
-    user_options = []
-    boolean_options = []
-
-    def initialize_options(self):
-        pass
-
-    def finalize_options(self):
-        pass
 
-    def run(self):
-        print(" creating %s" % versionfile_source)
-        with open(versionfile_source, "w") as f:
-            assert VCS is not None, "please set versioneer.VCS"
-            LONG = LONG_VERSION_PY[VCS]
-            f.write(LONG % {"DOLLAR": "$",
-                            "TAG_PREFIX": tag_prefix,
-                            "PARENTDIR_PREFIX": parentdir_prefix,
-                            "VERSIONFILE_SOURCE": versionfile_source,
-                            })
-
-        ipy = os.path.join(os.path.dirname(versionfile_source), "__init__.py")
-        if os.path.exists(ipy):
-            try:
-                with open(ipy, "r") as f:
-                    old = f.read()
-            except EnvironmentError:
-                old = ""
-            if INIT_PY_SNIPPET not in old:
-                print(" appending to %s" % ipy)
-                with open(ipy, "a") as f:
-                    f.write(INIT_PY_SNIPPET)
-            else:
-                print(" %s unmodified" % ipy)
-        else:
-            print(" %s doesn't exist, ok" % ipy)
-            ipy = None
-
-        # Make sure both the top-level "versioneer.py" and versionfile_source
-        # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
-        # they'll be copied into source distributions. Pip won't be able to
-        # install the package without this.
-        manifest_in = os.path.join(get_root(), "MANIFEST.in")
-        simple_includes = set()
+def do_setup():
+    """Do main VCS-independent setup function for installing Versioneer."""
+    root = get_root()
+    try:
+        cfg = get_config_from_root(root)
+    except (OSError, configparser.NoSectionError,
+            configparser.NoOptionError) as e:
+        if isinstance(e, (OSError, configparser.NoSectionError)):
+            print("Adding sample versioneer config to setup.cfg",
+                  file=sys.stderr)
+            with open(os.path.join(root, "setup.cfg"), "a") as f:
+                f.write(SAMPLE_CONFIG)
+        print(CONFIG_ERROR, file=sys.stderr)
+        return 1
+
+    print(" creating %s" % cfg.versionfile_source)
+    with open(cfg.versionfile_source, "w") as f:
+        LONG = LONG_VERSION_PY[cfg.VCS]
+        f.write(LONG % {"DOLLAR": "$",
+                        "STYLE": cfg.style,
+                        "TAG_PREFIX": cfg.tag_prefix,
+                        "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                        "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                        })
+
+    ipy = os.path.join(os.path.dirname(cfg.versionfile_source),
+                       "__init__.py")
+    if os.path.exists(ipy):
         try:
-            with open(manifest_in, "r") as f:
-                for line in f:
-                    if line.startswith("include "):
-                        for include in line.split()[1:]:
-                            simple_includes.add(include)
-        except EnvironmentError:
-            pass
-        # That doesn't cover everything MANIFEST.in can do
-        # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
-        # it might give some false negatives. Appending redundant 'include'
-        # lines is safe, though.
-        if "versioneer.py" not in simple_includes:
-            print(" appending 'versioneer.py' to MANIFEST.in")
-            with open(manifest_in, "a") as f:
-                f.write("include versioneer.py\n")
-        else:
-            print(" 'versioneer.py' already in MANIFEST.in")
-        if versionfile_source not in simple_includes:
-            print(" appending versionfile_source ('%s') to MANIFEST.in" %
-                  versionfile_source)
-            with open(manifest_in, "a") as f:
-                f.write("include %s\n" % versionfile_source)
+            with open(ipy, "r") as f:
+                old = f.read()
+        except OSError:
+            old = ""
+        module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0]
+        snippet = INIT_PY_SNIPPET.format(module)
+        if OLD_SNIPPET in old:
+            print(" replacing boilerplate in %s" % ipy)
+            with open(ipy, "w") as f:
+                f.write(old.replace(OLD_SNIPPET, snippet))
+        elif snippet not in old:
+            print(" appending to %s" % ipy)
+            with open(ipy, "a") as f:
+                f.write(snippet)
         else:
-            print(" versionfile_source already in MANIFEST.in")
-
-        # Make VCS-specific changes. For git, this means creating/changing
-        # .gitattributes to mark _version.py for export-time keyword
-        # substitution.
-        do_vcs_install(manifest_in, versionfile_source, ipy)
-
-
-def get_cmdclass():
-    cmds = {'version': cmd_version,
-            'versioneer': cmd_update_files,
-            'build': cmd_build,
-            'sdist': cmd_sdist,
-            }
-    if 'cx_Freeze' in sys.modules:  # cx_freeze enabled?
-        cmds['build_exe'] = cmd_build_exe
-        del cmds['build']
-
-    return cmds
+            print(" %s unmodified" % ipy)
+    else:
+        print(" %s doesn't exist, ok" % ipy)
+        ipy = None
+
+    # Make VCS-specific changes. For git, this means creating/changing
+    # .gitattributes to mark _version.py for export-subst keyword
+    # substitution.
+    do_vcs_install(cfg.versionfile_source, ipy)
+    return 0
+
+
+def scan_setup_py():
+    """Validate the contents of setup.py against Versioneer's expectations."""
+    found = set()
+    setters = False
+    errors = 0
+    with open("setup.py", "r") as f:
+        for line in f.readlines():
+            if "import versioneer" in line:
+                found.add("import")
+            if "versioneer.get_cmdclass()" in line:
+                found.add("cmdclass")
+            if "versioneer.get_version()" in line:
+                found.add("get_version")
+            if "versioneer.VCS" in line:
+                setters = True
+            if "versioneer.versionfile_source" in line:
+                setters = True
+    if len(found) != 3:
+        print("")
+        print("Your setup.py appears to be missing some important items")
+        print("(but I might be wrong). Please make sure it has something")
+        print("roughly like the following:")
+        print("")
+        print(" import versioneer")
+        print(" setup( version=versioneer.get_version(),")
+        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
+        print("")
+        errors += 1
+    if setters:
+        print("You should remove lines like 'versioneer.VCS = ' and")
+        print("'versioneer.versionfile_source = ' . This configuration")
+        print("now lives in setup.cfg, and should be removed from setup.py")
+        print("")
+        errors += 1
+    return errors
+
+
+def setup_command():
+    """Set up Versioneer and exit with appropriate error code."""
+    errors = do_setup()
+    errors += scan_setup_py()
+    sys.exit(1 if errors else 0)
+
+
+if __name__ == "__main__":
+    cmd = sys.argv[1]
+    if cmd == "setup":
+        setup_command()

From 36527f3d8a9b0b1d08fcc31a14577db98079971e Mon Sep 17 00:00:00 2001
From: Oscar Gustafsson <oscar.gustafsson@gmail.com>
Date: Mon, 20 Feb 2023 15:29:25 +0000
Subject: [PATCH 29/45] Update `inspector._get_commit()` for recent Versioneer

---
 numba/misc/help/inspector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numba/misc/help/inspector.py b/numba/misc/help/inspector.py
index 67cfc44f2f8..284a62af653 100644
--- a/numba/misc/help/inspector.py
+++ b/numba/misc/help/inspector.py
@@ -17,7 +17,7 @@
 
 
 def _get_commit():
-    full = get_versions()['full'].split('.')[0]
+    full = get_versions()['full-revisionid']
     if not full:
         warnings.warn(
             "Cannot find git commit hash. Source links could be inaccurate.",

From 1e42a0bf843f7f5957f1141c521e016b76e02a9a Mon Sep 17 00:00:00 2001
From: Andre Masella <andre@masella.name>
Date: Tue, 28 Feb 2023 13:46:51 -0500
Subject: [PATCH 30/45] Change LLVM version in documentation

As titled.
---
 docs/source/user/installing.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user/installing.rst b/docs/source/user/installing.rst
index d2a88a52ee6..f45f575fae8 100644
--- a/docs/source/user/installing.rst
+++ b/docs/source/user/installing.rst
@@ -257,7 +257,7 @@ information.
 +----------++--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+
 | Numba     | Release date | Python                    | NumPy                      | llvmlite                     | LLVM              | TBB                         |
 +===========+==============+===========================+============================+==============================+===================+=============================+
-| 0.57.x    | TBC          | 3.8.x <= version < 3.12   | 1.19 <= version < 1.24     | 0.40.x                       | 11.x              | 2021.x                      |
+| 0.57.x    | TBC          | 3.8.x <= version < 3.12   | 1.19 <= version < 1.24     | 0.40.x                       | 11.x-14.x         | 2021.x                      |
 +-----------+--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+
 | 0.56.4    | 2022-11-03   | 3.7.x <= version < 3.11   | 1.18 <= version < 1.24     | 0.39.x                       | 11.x              | 2021.x                      |
 +-----------+--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+

From e4b546b3ed94a53a967d7775b378cb857311836d Mon Sep 17 00:00:00 2001
From: Ian Thomas <ianthomas23@gmail.com>
Date: Thu, 2 Mar 2023 11:40:52 +0000
Subject: [PATCH 31/45] CUDA compare and swap with index

---
 numba/cuda/cudadecl.py                  | 21 ++++++
 numba/cuda/cudaimpl.py                  | 43 ++++++++++---
 numba/cuda/simulator/kernelapi.py       |  8 +++
 numba/cuda/stubs.py                     | 13 +++-
 numba/cuda/tests/cudapy/test_atomics.py | 85 +++++++++++++++++++++----
 5 files changed, 147 insertions(+), 23 deletions(-)

diff --git a/numba/cuda/cudadecl.py b/numba/cuda/cudadecl.py
index 1ddd8f64af5..88893e73f18 100644
--- a/numba/cuda/cudadecl.py
+++ b/numba/cuda/cudadecl.py
@@ -514,6 +514,24 @@ def generic(self, args, kws):
             return signature(dty, ary, dty, dty)
 
 
+@register
+class Cuda_atomic_cas(AbstractTemplate):
+    key = cuda.atomic.cas
+
+    def generic(self, args, kws):
+        assert not kws
+        ary, idx, old, val = args
+        dty = ary.dtype
+
+        if dty not in integer_numba_types:
+            return
+
+        if ary.ndim == 1:
+            return signature(dty, ary, types.intp, dty, dty)
+        elif ary.ndim > 1:
+            return signature(dty, ary, idx, dty, dty)
+
+
 @register
 class Cuda_nanosleep(ConcreteTemplate):
     key = cuda.nanosleep
@@ -602,6 +620,9 @@ def resolve_nanmax(self, mod):
     def resolve_compare_and_swap(self, mod):
         return types.Function(Cuda_atomic_compare_and_swap)
 
+    def resolve_cas(self, mod):
+        return types.Function(Cuda_atomic_cas)
+
 
 @register_attr
 class CudaFp16Template(AttributeTemplate):
diff --git a/numba/cuda/cudaimpl.py b/numba/cuda/cudaimpl.py
index 1be33f5ecc1..5a81ea7eac8 100644
--- a/numba/cuda/cudaimpl.py
+++ b/numba/cuda/cudaimpl.py
@@ -708,7 +708,7 @@ def impl(context, builder, sig, args):
 lower(math.degrees, types.f8)(gen_deg_rad(_rad2deg))
 
 
-def _normalize_indices(context, builder, indty, inds):
+def _normalize_indices(context, builder, indty, inds, aryty, valty):
     """
     Convert integer indices into tuple of intp
     """
@@ -719,6 +719,15 @@ def _normalize_indices(context, builder, indty, inds):
         indices = cgutils.unpack_tuple(builder, inds, count=len(indty))
     indices = [context.cast(builder, i, t, types.intp)
                for t, i in zip(indty, indices)]
+
+    dtype = aryty.dtype
+    if dtype != valty:
+        raise TypeError("expect %s but got %s" % (dtype, valty))
+
+    if aryty.ndim != len(indty):
+        raise TypeError("indexing %d-D array with %d-D index" %
+                        (aryty.ndim, len(indty)))
+
     return indty, indices
 
 
@@ -729,14 +738,8 @@ def imp(context, builder, sig, args):
         ary, inds, val = args
         dtype = aryty.dtype
 
-        indty, indices = _normalize_indices(context, builder, indty, inds)
-
-        if dtype != valty:
-            raise TypeError("expect %s but got %s" % (dtype, valty))
-
-        if aryty.ndim != len(indty):
-            raise TypeError("indexing %d-D array with %d-D index" %
-                            (aryty.ndim, len(indty)))
+        indty, indices = _normalize_indices(context, builder, indty, inds,
+                                            aryty, valty)
 
         lary = context.make_array(aryty)(context, builder, ary)
         ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices,
@@ -935,6 +938,28 @@ def ptx_atomic_cas_tuple(context, builder, sig, args):
                         'with %s array' % dtype)
 
 
+@lower(stubs.atomic.cas, types.Array, types.intp, types.Any, types.Any)
+@lower(stubs.atomic.cas, types.Array, types.Tuple, types.Any, types.Any)
+@lower(stubs.atomic.cas, types.Array, types.UniTuple, types.Any, types.Any)
+def ptx_atomic_cas(context, builder, sig, args):
+    aryty, indty, oldty, valty = sig.args
+    ary, inds, old, val = args
+
+    indty, indices = _normalize_indices(context, builder, indty, inds, aryty,
+                                        valty)
+
+    lary = context.make_array(aryty)(context, builder, ary)
+    ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices,
+                                   wraparound=True)
+
+    if aryty.dtype in (cuda.cudadecl.integer_numba_types):
+        lmod = builder.module
+        bitwidth = aryty.dtype.bitwidth
+        return nvvmutils.atomic_cmpxchg(builder, lmod, bitwidth, ptr, old, val)
+    else:
+        raise TypeError('Unimplemented atomic cas with %s array' % aryty.dtype)
+
+
 # -----------------------------------------------------------------------------
 
 @lower(stubs.nanosleep, types.uint32)
diff --git a/numba/cuda/simulator/kernelapi.py b/numba/cuda/simulator/kernelapi.py
index 91c55aa2e17..2d8bc4ae647 100644
--- a/numba/cuda/simulator/kernelapi.py
+++ b/numba/cuda/simulator/kernelapi.py
@@ -129,6 +129,7 @@ def array(self, shape, dtype):
 maxlock = threading.Lock()
 minlock = threading.Lock()
 caslock = threading.Lock()
+casindexlock = threading.Lock()
 inclock = threading.Lock()
 declock = threading.Lock()
 exchlock = threading.Lock()
@@ -221,6 +222,13 @@ def compare_and_swap(self, array, old, val):
                 array[index] = val
             return loaded
 
+    def cas(self, array, index, old, val):
+        with casindexlock:
+            loaded = array[index]
+            if loaded == old:
+                array[index] = val
+            return loaded
+
 
 class FakeCUDAFp16(object):
     def hadd(self, a, b):
diff --git a/numba/cuda/stubs.py b/numba/cuda/stubs.py
index 8575efd030d..9056d88c4d0 100644
--- a/numba/cuda/stubs.py
+++ b/numba/cuda/stubs.py
@@ -503,7 +503,18 @@ class compare_and_swap(Stub):
 
         Supported on int32, int64, uint32, uint64 operands only.
 
-        Returns the current value as if it is loaded atomically.
+        Returns the old value as if it is loaded atomically.
+        """
+
+    class cas(Stub):
+        """cas(ary, idx, old, val)
+
+        Conditionally assign ``val`` to the element ary[idx] of an array
+        ``ary`` if the current value of ary[idx] matches ``old``.
+
+        Supported on int32, int64, uint32, uint64 operands only.
+
+        Returns the old value as if it is loaded atomically.
         """
 
 
diff --git a/numba/cuda/tests/cudapy/test_atomics.py b/numba/cuda/tests/cudapy/test_atomics.py
index 81a223b40e0..5b1ee0465ef 100644
--- a/numba/cuda/tests/cudapy/test_atomics.py
+++ b/numba/cuda/tests/cudapy/test_atomics.py
@@ -456,6 +456,20 @@ def atomic_compare_and_swap(res, old, ary, fill_val):
         old[gid] = out
 
 
+def atomic_cas_1dim(res, old, ary, fill_val):
+    gid = cuda.grid(1)
+    if gid < res.size:
+        out = cuda.atomic.cas(res, gid, fill_val, ary[gid])
+        old[gid] = out
+
+
+def atomic_cas_2dim(res, old, ary, fill_val):
+    gid = cuda.grid(2)
+    if gid[0] < res.shape[0] and gid[1] < res.shape[1]:
+        out = cuda.atomic.cas(res, gid, fill_val, ary[gid])
+        old[gid] = out
+
+
 class TestCudaAtomics(CUDATestCase):
     def setUp(self):
         super().setUp()
@@ -1251,12 +1265,14 @@ def test_atomic_min_double_shared(self):
         gold = np.min(vals)
         np.testing.assert_equal(res, gold)
 
-    def check_compare_and_swap(self, n, fill, unfill, dtype):
+    def check_cas(self, n, fill, unfill, dtype, cas_func, ndim=1):
         res = [fill] * (n // 2) + [unfill] * (n // 2)
         np.random.shuffle(res)
         res = np.asarray(res, dtype=dtype)
+        if ndim == 2:
+            res.shape = (10, -1)
         out = np.zeros_like(res)
-        ary = np.random.randint(1, 10, size=res.size).astype(res.dtype)
+        ary = np.random.randint(1, 10, size=res.shape).astype(res.dtype)
 
         fill_mask = res == fill
         unfill_mask = res == unfill
@@ -1265,33 +1281,76 @@ def check_compare_and_swap(self, n, fill, unfill, dtype):
         expect_res[fill_mask] = ary[fill_mask]
         expect_res[unfill_mask] = unfill
 
-        expect_out = np.zeros_like(out)
-        expect_out[fill_mask] = res[fill_mask]
-        expect_out[unfill_mask] = unfill
+        expect_out = res.copy()
 
-        cuda_func = cuda.jit(atomic_compare_and_swap)
-        cuda_func[10, 10](res, out, ary, fill)
+        cuda_func = cuda.jit(cas_func)
+        if ndim == 1:
+            cuda_func[10, 10](res, out, ary, fill)
+        else:
+            cuda_func[(10, 10), (10, 10)](res, out, ary, fill)
 
         np.testing.assert_array_equal(expect_res, res)
         np.testing.assert_array_equal(expect_out, out)
 
     def test_atomic_compare_and_swap(self):
-        self.check_compare_and_swap(n=100, fill=-99, unfill=-1, dtype=np.int32)
+        self.check_cas(n=100, fill=-99, unfill=-1, dtype=np.int32,
+                       cas_func=atomic_compare_and_swap)
 
     def test_atomic_compare_and_swap2(self):
-        self.check_compare_and_swap(n=100, fill=-45, unfill=-1, dtype=np.int64)
+        self.check_cas(n=100, fill=-45, unfill=-1, dtype=np.int64,
+                       cas_func=atomic_compare_and_swap)
 
     def test_atomic_compare_and_swap3(self):
         rfill = np.random.randint(50, 500, dtype=np.uint32)
         runfill = np.random.randint(1, 25, dtype=np.uint32)
-        self.check_compare_and_swap(n=100, fill=rfill, unfill=runfill,
-                                    dtype=np.uint32)
+        self.check_cas(n=100, fill=rfill, unfill=runfill, dtype=np.uint32,
+                       cas_func=atomic_compare_and_swap)
 
     def test_atomic_compare_and_swap4(self):
         rfill = np.random.randint(50, 500, dtype=np.uint64)
         runfill = np.random.randint(1, 25, dtype=np.uint64)
-        self.check_compare_and_swap(n=100, fill=rfill, unfill=runfill,
-                                    dtype=np.uint64)
+        self.check_cas(n=100, fill=rfill, unfill=runfill, dtype=np.uint64,
+                       cas_func=atomic_compare_and_swap)
+
+    def test_atomic_cas_1dim(self):
+        self.check_cas(n=100, fill=-99, unfill=-1, dtype=np.int32,
+                       cas_func=atomic_cas_1dim)
+
+    def test_atomic_cas_2dim(self):
+        self.check_cas(n=100, fill=-99, unfill=-1, dtype=np.int32,
+                       cas_func=atomic_cas_2dim, ndim=2)
+
+    def test_atomic_cas2_1dim(self):
+        self.check_cas(n=100, fill=-45, unfill=-1, dtype=np.int64,
+                       cas_func=atomic_cas_1dim)
+
+    def test_atomic_cas2_2dim(self):
+        self.check_cas(n=100, fill=-45, unfill=-1, dtype=np.int64,
+                       cas_func=atomic_cas_2dim, ndim=2)
+
+    def test_atomic_cas3_1dim(self):
+        rfill = np.random.randint(50, 500, dtype=np.uint32)
+        runfill = np.random.randint(1, 25, dtype=np.uint32)
+        self.check_cas(n=100, fill=rfill, unfill=runfill, dtype=np.uint32,
+                       cas_func=atomic_cas_1dim)
+
+    def test_atomic_cas3_2dim(self):
+        rfill = np.random.randint(50, 500, dtype=np.uint32)
+        runfill = np.random.randint(1, 25, dtype=np.uint32)
+        self.check_cas(n=100, fill=rfill, unfill=runfill, dtype=np.uint32,
+                       cas_func=atomic_cas_2dim, ndim=2)
+
+    def test_atomic_cas4_1dim(self):
+        rfill = np.random.randint(50, 500, dtype=np.uint64)
+        runfill = np.random.randint(1, 25, dtype=np.uint64)
+        self.check_cas(n=100, fill=rfill, unfill=runfill, dtype=np.uint64,
+                       cas_func=atomic_cas_1dim)
+
+    def test_atomic_cas4_2dim(self):
+        rfill = np.random.randint(50, 500, dtype=np.uint64)
+        runfill = np.random.randint(1, 25, dtype=np.uint64)
+        self.check_cas(n=100, fill=rfill, unfill=runfill, dtype=np.uint64,
+                       cas_func=atomic_cas_2dim, ndim=2)
 
     # Tests that the atomic add, min, and max operations return the old value -
     # in the simulator, they did not (see Issue #5458). The max and min have

From 5faf0eada0e57a2d284e44ed2742f75baecc6d09 Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuartarchibald@users.noreply.github.com>
Date: Mon, 6 Mar 2023 11:48:00 +0000
Subject: [PATCH 32/45] Add pending-deprecation warnings for ``numba.pycc``

This patch:

* Adds code that raises ``NumbaPendingDeprecationWarning``s at
  the point of initialisation for ``numba.pycc``.
* Adds tests for the above warning.
* Updates the deprecation notices in the documentation to reflect
  the above.
* Updates the ``pycc`` module documentation with a note stating
  it is pending deprecation.
* Fixes up the testing infrastructure to avoid raising the above
  warning on test discovery or use of the ``numba.tests.support``
  module.

xref: #8509
---
 docs/source/reference/aot-compilation.rst |  3 ++
 docs/source/reference/deprecation.rst     | 65 +++++++++++++++++++++++
 docs/source/user/pycc.rst                 |  3 ++
 numba/pycc/__init__.py                    | 18 +++++++
 numba/tests/support.py                    |  4 +-
 numba/tests/test_deprecations.py          | 24 +++++++++
 numba/tests/test_pycc.py                  | 15 ++++--
 7 files changed, 127 insertions(+), 5 deletions(-)

diff --git a/docs/source/reference/aot-compilation.rst b/docs/source/reference/aot-compilation.rst
index a6d56ee3433..28376f3e583 100644
--- a/docs/source/reference/aot-compilation.rst
+++ b/docs/source/reference/aot-compilation.rst
@@ -3,6 +3,9 @@
 Ahead-of-Time compilation
 =========================
 
+.. note:: This module is pending deprecation. Please see
+          :ref:`deprecation-numba-pycc`for more information.
+
 .. currentmodule:: numba.pycc
 
 .. class:: CC(extension_name, source_module=None)
diff --git a/docs/source/reference/deprecation.rst b/docs/source/reference/deprecation.rst
index d3f0f73469f..a14b98a3368 100644
--- a/docs/source/reference/deprecation.rst
+++ b/docs/source/reference/deprecation.rst
@@ -387,6 +387,71 @@ for example::
   print(foo("a string"))
 
 
+.. _deprecation-numba-pycc:
+
+Deprecation of the ``numba.pycc`` module
+========================================
+Numba has supported some degree of Ahead-of-Time (AOT) compilation through the
+use of the tools in the ``numba.pycc`` module. This capability is very important
+to the Numba project and following an assessment of the viability of the current
+approach, it was decided to deprecate it in favour of developing new technology
+to better meet current needs.
+
+Reason for deprecation
+----------------------
+
+There are a number of reasons for this deprecation.
+
+* ``numba.pycc`` tools create C-Extensions that have symbols that are only
+  usable from the Python interpreter, they are not compatible with calls made
+  from within code compiled using Numba's JIT compiler. This drastically reduces
+  the utility of AOT compiled functions.
+* ``numba.pycc`` has some reliance on ``setuptools`` (and ``distutils``) which
+  is something Numba is trying to reduce, particularly due to the upcoming
+  removal of ``distutils`` in Python 3.12.
+* The ``numba.pycc`` compilation chain is very limited in terms of its feature
+  set in comparison to Numba's JIT compiler, it also has numerous technical
+  issues to do with declaring and linking both internal and external libraries.
+* The number of users of ``numba.pycc`` is assumed to be quite small, this was
+  indicated through discussions at a Numba public meeting on 2022-10-04 and
+  issue #8509.
+* The Numba project is working on new innovations in the AOT compiler space and
+  the maintainers consider it a better use of resources to develop these than
+  maintain and develop ``numba.pycc``.
+
+Example(s) of the impact
+------------------------
+
+Any source code using ``numba.pycc`` would fail to work once the functionality
+has been removed.
+
+Schedule
+--------
+
+This feature will be removed with respect to this schedule:
+
+* Pending-deprecation warnings will be issued in version 0.57.0.
+* Deprecation warnings will be issued once a replacement is developed.
+* Deprecation warnings will be given for a minimum of two releases prior to full
+  removal.
+
+Recommendations
+---------------
+
+Projects that need/rely on the deprecated behaviour should pin their dependency
+on Numba to a version prior to removal of this behaviour, or consider following
+replacement instructions below that outline how to adjust to the change.
+
+Replacement
+-----------
+
+A replacement for this functionality is being developed as part of the Numba
+2023 development focus. The ``numba.pycc`` module will not be removed until this
+replacement functionality is able to provide similar utility and offer an
+upgrade path. At the point of the new technology being deemed suitable,
+replacement instructions will be issued.
+
+
 Deprecation of eager compilation of CUDA device functions
 =========================================================
 
diff --git a/docs/source/user/pycc.rst b/docs/source/user/pycc.rst
index 50eba7f4a10..7ef1c488eeb 100644
--- a/docs/source/user/pycc.rst
+++ b/docs/source/user/pycc.rst
@@ -8,6 +8,9 @@ Compiling code ahead of time
 While Numba's main use case is :term:`Just-in-Time compilation`, it also
 provides a facility for :term:`Ahead-of-Time compilation` (AOT).
 
+.. note:: This module is pending deprecation. Please see
+          :ref:`deprecation-numba-pycc`for more information.
+
 
 Overview
 ========
diff --git a/numba/pycc/__init__.py b/numba/pycc/__init__.py
index 122d062f1d2..28ce874a0d4 100644
--- a/numba/pycc/__init__.py
+++ b/numba/pycc/__init__.py
@@ -1,5 +1,23 @@
 # -*- coding: utf-8 -*-
 
+import warnings
+from numba.core.errors import NumbaPendingDeprecationWarning
+
 # Public API
 from .cc import CC
 from .decorators import export, exportmany
+
+# If use of anything is attempted through the `pycc` import path this warning
+# will be shown.
+__pycc_deprecation_doc_url = ("https://numba.readthedocs.io/en/stable/"
+                              "reference/deprecation.html"
+                              "#deprecation-of-the-numba-pycc-module")
+__pycc_pending_deprecation_message = ("The 'pycc' module is pending "
+                                      "deprecation. Replacement technology is "
+                                      "being developed.\n\n"
+                                      "Pending Deprecation in Numba 0.57.0. "
+                                      "For more information please see: "
+                                      f"{__pycc_deprecation_doc_url}")
+
+_pend_dep = NumbaPendingDeprecationWarning(__pycc_pending_deprecation_message)
+warnings.warn(_pend_dep, stacklevel=2)
diff --git a/numba/tests/support.py b/numba/tests/support.py
index 79e22daf0ac..42e45fe0a99 100644
--- a/numba/tests/support.py
+++ b/numba/tests/support.py
@@ -50,7 +50,6 @@
     models,
 )
 from numba.core.datamodel.models import OpaqueModel
-from numba.pycc.platform import external_compiler_works
 
 try:
     import scipy
@@ -662,6 +661,9 @@ def skip_if_no_external_compiler(self):
         decorator so as to make it "lazy" via runtime evaluation opposed to
         running at test-discovery time.
         """
+        # This is a local import to avoid deprecation warnings being generated
+        # through the use of the numba.pycc module.
+        from numba.pycc.platform import external_compiler_works
         if not external_compiler_works():
             self.skipTest("No suitable external compiler was found.")
 
diff --git a/numba/tests/test_deprecations.py b/numba/tests/test_deprecations.py
index dc8a3600810..e5bb4195f3d 100644
--- a/numba/tests/test_deprecations.py
+++ b/numba/tests/test_deprecations.py
@@ -136,6 +136,30 @@ def foo():
             self.check_warning(w, "numba.generated_jit is deprecated",
                                NumbaDeprecationWarning)
 
+    @TestCase.run_test_in_subprocess
+    def test_pycc_module(self):
+        # checks import of module warns
+
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always",
+                                  category=NumbaPendingDeprecationWarning)
+            import numba.pycc
+
+            expected_str = ("The 'pycc' module is pending deprecation.")
+            self.check_warning(w, expected_str, NumbaPendingDeprecationWarning)
+
+    @TestCase.run_test_in_subprocess
+    def test_pycc_CC(self):
+        # check the most commonly used functionality (CC) warns
+
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always",
+                                  category=NumbaPendingDeprecationWarning)
+            from numba.pycc import CC
+
+            expected_str = ("The 'pycc' module is pending deprecation.")
+            self.check_warning(w, expected_str, NumbaPendingDeprecationWarning)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/numba/tests/test_pycc.py b/numba/tests/test_pycc.py
index a116b68b513..f2b0a6d8e9d 100644
--- a/numba/tests/test_pycc.py
+++ b/numba/tests/test_pycc.py
@@ -13,10 +13,6 @@
 import llvmlite.binding as ll
 
 from numba.core import utils
-from numba.pycc.decorators import clear_export_registry
-from numba.pycc.platform import find_shared_ending, find_pyext_ending
-from numba.pycc.platform import external_compiler_works
-
 from numba.tests.support import (TestCase, tag, import_dynamic, temp_directory,
                                  has_blas)
 import unittest
@@ -50,6 +46,10 @@ def test_windows_compiler_validity(self):
         # When inside conda-build VSINSTALLDIR should be set and windows should
         # have a valid compiler available, `external_compiler_works()` should
         # agree with this. If this is not the case then error out to alert devs.
+
+        # This is a local import to avoid deprecation warnings being generated
+        # through the use of the numba.pycc module.
+        from numba.pycc.platform import external_compiler_works
         is_running_conda_build = os.environ.get('CONDA_BUILD', None) is not None
         if is_running_conda_build:
             if os.environ.get('VSINSTALLDIR', None) is not None:
@@ -71,6 +71,10 @@ def tearDown(self):
         # Since we're executing the module-under-test several times
         # from the same process, we must clear the exports registry
         # between invocations.
+
+        # This is a local import to avoid deprecation warnings being generated
+        # through the use of the numba.pycc module.
+        from numba.pycc.decorators import clear_export_registry
         clear_export_registry()
 
     @contextlib.contextmanager
@@ -136,6 +140,9 @@ def test_cc_properties(self):
         self.assertTrue(os.path.basename(f).startswith('pycc_test_simple.'), f)
         if sys.platform.startswith('linux'):
             self.assertTrue(f.endswith('.so'), f)
+            # This is a local import to avoid deprecation warnings being
+            # generated through the use of the numba.pycc module.
+            from numba.pycc.platform import find_pyext_ending
             self.assertIn(find_pyext_ending(), f)
 
     def test_compile(self):

From e5e84faf7e92f2864d15e6e3464367b62fd14705 Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuartarchibald@users.noreply.github.com>
Date: Mon, 6 Mar 2023 12:23:55 +0000
Subject: [PATCH 33/45] Fix flake8. Deliberate unused import in tests.

As title.
---
 numba/tests/test_deprecations.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/numba/tests/test_deprecations.py b/numba/tests/test_deprecations.py
index e5bb4195f3d..fafdc846836 100644
--- a/numba/tests/test_deprecations.py
+++ b/numba/tests/test_deprecations.py
@@ -143,7 +143,7 @@ def test_pycc_module(self):
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter("always",
                                   category=NumbaPendingDeprecationWarning)
-            import numba.pycc
+            import numba.pycc # noqa: F401
 
             expected_str = ("The 'pycc' module is pending deprecation.")
             self.check_warning(w, expected_str, NumbaPendingDeprecationWarning)
@@ -155,7 +155,7 @@ def test_pycc_CC(self):
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter("always",
                                   category=NumbaPendingDeprecationWarning)
-            from numba.pycc import CC
+            from numba.pycc import CC # noqa: F401
 
             expected_str = ("The 'pycc' module is pending deprecation.")
             self.check_warning(w, expected_str, NumbaPendingDeprecationWarning)

From 9f1f8b865c181ad656ccbba5154b3b5ad01684b9 Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuartarchibald@users.noreply.github.com>
Date: Mon, 6 Mar 2023 12:51:13 +0000
Subject: [PATCH 34/45] Fix syntax error in docs.

As title.
---
 docs/source/reference/aot-compilation.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/reference/aot-compilation.rst b/docs/source/reference/aot-compilation.rst
index 28376f3e583..0f55d0138ba 100644
--- a/docs/source/reference/aot-compilation.rst
+++ b/docs/source/reference/aot-compilation.rst
@@ -4,7 +4,7 @@ Ahead-of-Time compilation
 =========================
 
 .. note:: This module is pending deprecation. Please see
-          :ref:`deprecation-numba-pycc`for more information.
+          :ref:`deprecation-numba-pycc` for more information.
 
 .. currentmodule:: numba.pycc
 

From ad4f6234830963aed32e88b7172d5535ab601a66 Mon Sep 17 00:00:00 2001
From: Ian Thomas <ianthomas23@gmail.com>
Date: Mon, 6 Mar 2023 17:39:29 +0000
Subject: [PATCH 35/45] Updates following review

---
 docs/source/cuda-reference/kernel.rst   | 29 +++++++++++++++++--------
 numba/cuda/cudaimpl.py                  | 20 ++++-------------
 numba/cuda/simulator/kernelapi.py       |  6 ++---
 numba/cuda/stubs.py                     |  4 ++--
 numba/cuda/tests/cudapy/test_atomics.py |  9 +++-----
 5 files changed, 32 insertions(+), 36 deletions(-)

diff --git a/docs/source/cuda-reference/kernel.rst b/docs/source/cuda-reference/kernel.rst
index cccfc13222b..9877c93f2fb 100644
--- a/docs/source/cuda-reference/kernel.rst
+++ b/docs/source/cuda-reference/kernel.rst
@@ -175,7 +175,7 @@ Synchronization and Atomic Operations
     indices for indexing into multiple dimensional arrays. The number of element
     in ``idx`` must match the number of dimension of ``array``.
 
-    Returns the value of ``array[idx]`` before the storing the new value.
+    Returns the value of ``array[idx]`` before storing the new value.
     Behaves like an atomic load.
 
 .. function:: numba.cuda.atomic.sub(array, idx, value)
@@ -185,7 +185,7 @@ Synchronization and Atomic Operations
     indices for indexing into multi-dimensional arrays. The number of elements
     in ``idx`` must match the number of dimensions of ``array``.
 
-    Returns the value of ``array[idx]`` before the storing the new value.
+    Returns the value of ``array[idx]`` before storing the new value.
     Behaves like an atomic load.
 
 .. function:: numba.cuda.atomic.and_(array, idx, value)
@@ -195,7 +195,7 @@ Synchronization and Atomic Operations
     integer indices for indexing into multi-dimensional arrays. The number
     of elements in ``idx`` must match the number of dimensions of ``array``.
 
-    Returns the value of ``array[idx]`` before the storing the new value.
+    Returns the value of ``array[idx]`` before storing the new value.
     Behaves like an atomic load.
 
 .. function:: numba.cuda.atomic.or_(array, idx, value)
@@ -205,7 +205,7 @@ Synchronization and Atomic Operations
     integer indices for indexing into multi-dimensional arrays. The number
     of elements in ``idx`` must match the number of dimensions of ``array``.
 
-    Returns the value of ``array[idx]`` before the storing the new value.
+    Returns the value of ``array[idx]`` before storing the new value.
     Behaves like an atomic load.
 
 .. function:: numba.cuda.atomic.xor(array, idx, value)
@@ -215,7 +215,7 @@ Synchronization and Atomic Operations
     integer indices for indexing into multi-dimensional arrays. The number
     of elements in ``idx`` must match the number of dimensions of ``array``.
 
-    Returns the value of ``array[idx]`` before the storing the new value.
+    Returns the value of ``array[idx]`` before storing the new value.
     Behaves like an atomic load.
 
 .. function:: numba.cuda.atomic.exch(array, idx, value)
@@ -225,7 +225,7 @@ Synchronization and Atomic Operations
     integer indices for indexing into multi-dimensional arrays. The number
     of elements in ``idx`` must match the number of dimensions of ``array``.
 
-    Returns the value of ``array[idx]`` before the storing the new value.
+    Returns the value of ``array[idx]`` before storing the new value.
     Behaves like an atomic load.
 
 .. function:: numba.cuda.atomic.inc(array, idx, value)
@@ -236,7 +236,7 @@ Synchronization and Atomic Operations
     The number of elements in ``idx`` must match the number of dimensions of
     ``array``.
 
-    Returns the value of ``array[idx]`` before the storing the new value.
+    Returns the value of ``array[idx]`` before storing the new value.
     Behaves like an atomic load.
 
 .. function:: numba.cuda.atomic.dec(array, idx, value)
@@ -248,7 +248,7 @@ Synchronization and Atomic Operations
     The number of elements in ``idx`` must match the number of dimensions of
     ``array``.
 
-    Returns the value of ``array[idx]`` before the storing the new value.
+    Returns the value of ``array[idx]`` before storing the new value.
     Behaves like an atomic load.
 
 .. function:: numba.cuda.atomic.max(array, idx, value)
@@ -259,9 +259,20 @@ Synchronization and Atomic Operations
     The number of element in ``idx`` must match the number of dimension of
     ``array``.
 
-    Returns the value of ``array[idx]`` before the storing the new value.
+    Returns the value of ``array[idx]`` before storing the new value.
     Behaves like an atomic load.
 
+.. function:: numba.cuda.atomic.cas(array, idx, old, value)
+
+    Perform ``if array[idx] == old: array[idx] = value``. Supports int32,
+    int64, uint32, uint64 indexes only. The ``idx`` argument can be an integer
+    or a tuple of integer indices for indexing into multi-dimensional arrays.
+    The number of elements in ``idx`` must match the number of dimensions of
+    ``array``.
+
+    Returns the value of ``array[idx]`` before storing the new value.
+    Behaves like an atomic compare and swap.
+
 
 .. function:: numba.cuda.syncthreads
 
diff --git a/numba/cuda/cudaimpl.py b/numba/cuda/cudaimpl.py
index 5a81ea7eac8..72ed22f5325 100644
--- a/numba/cuda/cudaimpl.py
+++ b/numba/cuda/cudaimpl.py
@@ -920,22 +920,10 @@ def ptx_atomic_nanmin(context, builder, dtype, ptr, val):
 
 
 @lower(stubs.atomic.compare_and_swap, types.Array, types.Any, types.Any)
-def ptx_atomic_cas_tuple(context, builder, sig, args):
-    aryty, oldty, valty = sig.args
-    ary, old, val = args
-    dtype = aryty.dtype
-
-    lary = context.make_array(aryty)(context, builder, ary)
-    zero = context.get_constant(types.intp, 0)
-    ptr = cgutils.get_item_pointer(context, builder, aryty, lary, (zero,))
-
-    if aryty.dtype in (cuda.cudadecl.integer_numba_types):
-        lmod = builder.module
-        bitwidth = aryty.dtype.bitwidth
-        return nvvmutils.atomic_cmpxchg(builder, lmod, bitwidth, ptr, old, val)
-    else:
-        raise TypeError('Unimplemented atomic compare_and_swap '
-                        'with %s array' % dtype)
+def ptx_atomic_compare_and_swap(context, builder, sig, args):
+    sig = sig.return_type(sig.args[0], types.intp, sig.args[1], sig.args[2])
+    args = (args[0], context.get_constant(types.intp, 0), args[1], args[2])
+    return ptx_atomic_cas(context, builder, sig, args)
 
 
 @lower(stubs.atomic.cas, types.Array, types.intp, types.Any, types.Any)
diff --git a/numba/cuda/simulator/kernelapi.py b/numba/cuda/simulator/kernelapi.py
index 2d8bc4ae647..64793df054c 100644
--- a/numba/cuda/simulator/kernelapi.py
+++ b/numba/cuda/simulator/kernelapi.py
@@ -128,8 +128,8 @@ def array(self, shape, dtype):
 xorlock = threading.Lock()
 maxlock = threading.Lock()
 minlock = threading.Lock()
+compare_and_swaplock = threading.Lock()
 caslock = threading.Lock()
-casindexlock = threading.Lock()
 inclock = threading.Lock()
 declock = threading.Lock()
 exchlock = threading.Lock()
@@ -215,7 +215,7 @@ def nanmin(self, array, index, val):
         return old
 
     def compare_and_swap(self, array, old, val):
-        with caslock:
+        with compare_and_swaplock:
             index = (0,) * array.ndim
             loaded = array[index]
             if loaded == old:
@@ -223,7 +223,7 @@ def compare_and_swap(self, array, old, val):
             return loaded
 
     def cas(self, array, index, old, val):
-        with casindexlock:
+        with caslock:
             loaded = array[index]
             if loaded == old:
                 array[index] = val
diff --git a/numba/cuda/stubs.py b/numba/cuda/stubs.py
index 9056d88c4d0..a3626680045 100644
--- a/numba/cuda/stubs.py
+++ b/numba/cuda/stubs.py
@@ -509,8 +509,8 @@ class compare_and_swap(Stub):
     class cas(Stub):
         """cas(ary, idx, old, val)
 
-        Conditionally assign ``val`` to the element ary[idx] of an array
-        ``ary`` if the current value of ary[idx] matches ``old``.
+        Conditionally assign ``val`` to the element ``idx`` of an array
+        ``ary`` if the current value of ``ary[idx]`` matches ``old``.
 
         Supported on int32, int64, uint32, uint64 operands only.
 
diff --git a/numba/cuda/tests/cudapy/test_atomics.py b/numba/cuda/tests/cudapy/test_atomics.py
index 5b1ee0465ef..ad70857d863 100644
--- a/numba/cuda/tests/cudapy/test_atomics.py
+++ b/numba/cuda/tests/cudapy/test_atomics.py
@@ -452,22 +452,19 @@ def atomic_double_shared(res, ary):
 def atomic_compare_and_swap(res, old, ary, fill_val):
     gid = cuda.grid(1)
     if gid < res.size:
-        out = cuda.atomic.compare_and_swap(res[gid:], fill_val, ary[gid])
-        old[gid] = out
+        old[gid] = cuda.atomic.compare_and_swap(res[gid:], fill_val, ary[gid])
 
 
 def atomic_cas_1dim(res, old, ary, fill_val):
     gid = cuda.grid(1)
     if gid < res.size:
-        out = cuda.atomic.cas(res, gid, fill_val, ary[gid])
-        old[gid] = out
+        old[gid] = cuda.atomic.cas(res, gid, fill_val, ary[gid])
 
 
 def atomic_cas_2dim(res, old, ary, fill_val):
     gid = cuda.grid(2)
     if gid[0] < res.shape[0] and gid[1] < res.shape[1]:
-        out = cuda.atomic.cas(res, gid, fill_val, ary[gid])
-        old[gid] = out
+        old[gid] = cuda.atomic.cas(res, gid, fill_val, ary[gid])
 
 
 class TestCudaAtomics(CUDATestCase):

From a109b002f80b513369ae9c4a9c7e5e48eaebb69f Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Wed, 8 Mar 2023 11:13:37 +0000
Subject: [PATCH 36/45] CUDA: Add 12.0 and 12.1 supported compute capabilities
 to nvvm.py

---
 numba/cuda/cudadrv/nvvm.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/numba/cuda/cudadrv/nvvm.py b/numba/cuda/cudadrv/nvvm.py
index ddfd759350f..64d49fd9e7e 100644
--- a/numba/cuda/cudadrv/nvvm.py
+++ b/numba/cuda/cudadrv/nvvm.py
@@ -368,6 +368,8 @@ def get_log(self):
     (11, 6): ((3, 5), (8, 7)),
     (11, 7): ((3, 5), (8, 7)),
     (11, 8): ((3, 5), (9, 0)),
+    (12, 0): ((5, 0), (9, 0)),
+    (12, 1): ((5, 0), (9, 0)),
 }
 
 

From bd92dd2cf78e6b53b9a2d66999df879922f1da60 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Wed, 8 Mar 2023 11:14:45 +0000
Subject: [PATCH 37/45] Document that MVC is not supported on CUDA 12 yet

---
 docs/source/cuda/minor_version_compatibility.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/source/cuda/minor_version_compatibility.rst b/docs/source/cuda/minor_version_compatibility.rst
index b3888f0d042..eb52f1e9892 100644
--- a/docs/source/cuda/minor_version_compatibility.rst
+++ b/docs/source/cuda/minor_version_compatibility.rst
@@ -17,6 +17,8 @@ Numba supports MVC for CUDA 11 on Linux using the external ``cubinlinker`` and
 - Cooperative Groups are unsupported, because they require an archive to be
   linked.
 
+MVC is not yet supported on CUDA 12 - this will be added in a future release of Numba.
+
 MVC is not supported on Windows.
 
 

From 1d1609ac94f2faf8e331f20d5a95427e9e5ef618 Mon Sep 17 00:00:00 2001
From: Graham Markall <535640+gmarkall@users.noreply.github.com>
Date: Wed, 8 Mar 2023 11:33:58 +0000
Subject: [PATCH 38/45] Line-wrap
 docs/source/cuda/minor_version_compatibility.rst

PR #7255 feedback.

Co-authored-by: stuartarchibald <stuartarchibald@users.noreply.github.com>
---
 docs/source/cuda/minor_version_compatibility.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/source/cuda/minor_version_compatibility.rst b/docs/source/cuda/minor_version_compatibility.rst
index eb52f1e9892..f09e91751b6 100644
--- a/docs/source/cuda/minor_version_compatibility.rst
+++ b/docs/source/cuda/minor_version_compatibility.rst
@@ -17,7 +17,8 @@ Numba supports MVC for CUDA 11 on Linux using the external ``cubinlinker`` and
 - Cooperative Groups are unsupported, because they require an archive to be
   linked.
 
-MVC is not yet supported on CUDA 12 - this will be added in a future release of Numba.
+MVC is not yet supported on CUDA 12 - this will be added in a future release of
+Numba.
 
 MVC is not supported on Windows.
 

From bb9b0d9f776f20fdca9000da4355556168e11aed Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Wed, 8 Mar 2023 12:47:45 +0000
Subject: [PATCH 39/45] Revise lineinfo tests based on PR #8594 feedback

- Make the regex looking for device functions expect at least one space
  between `.weak` and `.func`.
- Correct a typo.
- Delete an obsolete comment.
- Ignore internal warnings when catching warnings.
---
 numba/cuda/tests/cudapy/test_lineinfo.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/numba/cuda/tests/cudapy/test_lineinfo.py b/numba/cuda/tests/cudapy/test_lineinfo.py
index 6a24458bffc..afc209b86ca 100644
--- a/numba/cuda/tests/cudapy/test_lineinfo.py
+++ b/numba/cuda/tests/cudapy/test_lineinfo.py
@@ -2,6 +2,7 @@
 from numba.core.errors import NumbaInvalidConfigWarning
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
 from numba.cuda.cudadrv.nvvm import NVVM
+from numba.tests.support import ignore_internal_warnings
 import re
 import unittest
 import warnings
@@ -144,13 +145,13 @@ def caller(x):
         # Check that there is no device function in the PTX
 
         # A line beginning with ".weak .func" that identifies a device function
-        devfn_start = re.compile(r'^\.weak\s*\.func')
+        devfn_start = re.compile(r'^\.weak\s+\.func')
 
         for line in ptxlines:
             if devfn_start.match(line) is not None:
                 self.fail(f"Found device function in PTX:\n\n{ptx}")
 
-        # Scan for .loc directives taht refer to an inlined device function
+        # Scan for .loc directives that refer to an inlined device function
 
         loc_directive = self._loc_directive_regex()
         found = False
@@ -162,9 +163,6 @@ def caller(x):
                     break
 
         if not found:
-            # Join one line either side so the function as a whole is shown,
-            # i.e. including the declaration and parameter list, and the
-            # closing brace.
             self.fail(f'No .loc directive with inlined_at info found'
                       f'in:\n\n{ptx}')
 
@@ -190,6 +188,8 @@ def caller(x):
 
     def test_debug_and_lineinfo_warning(self):
         with warnings.catch_warnings(record=True) as w:
+            ignore_internal_warnings()
+
             # We pass opt=False to prevent the warning about opt and debug
             # occurring as well
             @cuda.jit(debug=True, lineinfo=True, opt=False)

From 2f104eafd05088812f95536fb34c8d11005604b0 Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuartarchibald@users.noreply.github.com>
Date: Wed, 8 Mar 2023 13:12:55 +0000
Subject: [PATCH 40/45] Fix docs build

As title. Fixing syntax error.
---
 docs/source/user/pycc.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user/pycc.rst b/docs/source/user/pycc.rst
index 7ef1c488eeb..7293872f8db 100644
--- a/docs/source/user/pycc.rst
+++ b/docs/source/user/pycc.rst
@@ -9,7 +9,7 @@ While Numba's main use case is :term:`Just-in-Time compilation`, it also
 provides a facility for :term:`Ahead-of-Time compilation` (AOT).
 
 .. note:: This module is pending deprecation. Please see
-          :ref:`deprecation-numba-pycc`for more information.
+          :ref:`deprecation-numba-pycc` for more information.
 
 
 Overview

From e09828bbffdba8732c4155490387fd8c2712fbba Mon Sep 17 00:00:00 2001
From: Andre Masella <andre@masella.name>
Date: Thu, 9 Mar 2023 20:00:07 -0500
Subject: [PATCH 41/45] Allow LLVM 14 for Linux AArch64 compatibility

As titled.
---
 numba/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numba/__init__.py b/numba/__init__.py
index 3bd14fea801..691e08c4b0e 100644
--- a/numba/__init__.py
+++ b/numba/__init__.py
@@ -143,7 +143,7 @@ def test(argv, **kwds):
 
 
 _min_llvmlite_version = (0, 40, 0)
-_min_llvm_version = (14, 0, 0)
+_min_llvm_version = (11, 0, 0)
 
 def _ensure_llvm():
     """

From bcf35f224ffa99257b728999993aa2be52724a8d Mon Sep 17 00:00:00 2001
From: Andre Masella <andre@masella.name>
Date: Mon, 13 Mar 2023 14:48:44 -0400
Subject: [PATCH 42/45] Remove LLVM14 build scaffolding

As titled.
---
 buildscripts/condarecipe.local/meta.yaml             | 2 +-
 buildscripts/incremental/setup_conda_environment.cmd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/buildscripts/condarecipe.local/meta.yaml b/buildscripts/condarecipe.local/meta.yaml
index 807e7d60f14..6f4bf96a5da 100644
--- a/buildscripts/condarecipe.local/meta.yaml
+++ b/buildscripts/condarecipe.local/meta.yaml
@@ -34,7 +34,7 @@ requirements:
     - setuptools
     - importlib_metadata       # [py<39]
     # On channel https://anaconda.org/numba/
-    - llvmlite >=0.40.0dev0llvm14,<0.40
+    - llvmlite >=0.40,<0.40
     # TBB devel version is to match TBB libs.
     # 2020.3 is the last version with the "old" ABI
     # NOTE: 2021.1..2021.5 are API compatible for Numba's purposes.
diff --git a/buildscripts/incremental/setup_conda_environment.cmd b/buildscripts/incremental/setup_conda_environment.cmd
index 8d000a7247c..c6062a1b014 100644
--- a/buildscripts/incremental/setup_conda_environment.cmd
+++ b/buildscripts/incremental/setup_conda_environment.cmd
@@ -29,7 +29,7 @@ conda create -n %CONDA_ENV% -q -y python=%PYTHON% %NUMPY_CHANNEL_PKG%=%NUMPY% cf
 
 call activate %CONDA_ENV%
 @rem Install latest llvmlite build
-%CONDA_INSTALL% -c numba/label/dev "llvmlite=0.40.0dev0llvm14*"
+%CONDA_INSTALL% -c numba/label/dev llvmlite=0.40
 @rem Install required backports for older Pythons
 if %PYTHON% LSS 3.9 (%CONDA_INSTALL% importlib_metadata)
 @rem Install dependencies for building the documentation

From 7dd0ab6f2b815834b7247fbcf5e8a8f490811b42 Mon Sep 17 00:00:00 2001
From: Andre Masella <andre@masella.name>
Date: Mon, 13 Mar 2023 14:49:01 -0400
Subject: [PATCH 43/45] Add documentation for LLVM 14 passes

As titled.
---
 numba/core/codegen.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/numba/core/codegen.py b/numba/core/codegen.py
index 2d694901780..ced9e87aac7 100644
--- a/numba/core/codegen.py
+++ b/numba/core/codegen.py
@@ -1225,6 +1225,8 @@ def _module_pass_manager(self, **kwargs):
                 pm.add_licm_pass()
                 pm.add_cfg_simplification_pass()
             else:
+                # These passes are required to get SVML to vectorize tests
+                # properly on LLVM 14
                 pm.add_instruction_combining_pass()
                 pm.add_jump_threading_pass()
 

From 03cf85955a88096ae9dfa0660d44268be23b93ac Mon Sep 17 00:00:00 2001
From: Andre Masella <andre@masella.name>
Date: Mon, 13 Mar 2023 14:52:37 -0400
Subject: [PATCH 44/45] Remove more LLVM 14 build scaffolding

As titled.
---
 buildscripts/condarecipe.local/meta.yaml            | 2 +-
 buildscripts/incremental/setup_conda_environment.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/buildscripts/condarecipe.local/meta.yaml b/buildscripts/condarecipe.local/meta.yaml
index 6f4bf96a5da..2c9bf8f6c17 100644
--- a/buildscripts/condarecipe.local/meta.yaml
+++ b/buildscripts/condarecipe.local/meta.yaml
@@ -48,7 +48,7 @@ requirements:
     - setuptools
     - importlib_metadata       # [py<39]
     # On channel https://anaconda.org/numba/
-    - llvmlite ==0.40.0dev0llvm14
+    - llvmlite >=0.40.0dev0,<0.40
   run_constrained:
     # If TBB is present it must be at least version 2021
     - tbb >=2021    # [not (aarch64 or ppc64le)]
diff --git a/buildscripts/incremental/setup_conda_environment.sh b/buildscripts/incremental/setup_conda_environment.sh
index 13a08697748..bc554421b3d 100755
--- a/buildscripts/incremental/setup_conda_environment.sh
+++ b/buildscripts/incremental/setup_conda_environment.sh
@@ -72,7 +72,7 @@ elif  [[ $(uname) == Darwin ]]; then
 fi
 
 # Install latest correct build
-$CONDA_INSTALL -c numba/label/dev "llvmlite=0.40.0dev0llvm14*"
+$CONDA_INSTALL -c numba/label/dev llvmlite=0.40
 
 # Install importlib-metadata for Python < 3.9
 if [ $PYTHON \< "3.9" ]; then $CONDA_INSTALL importlib_metadata; fi

From 0fec27db1a0315eec1a07d648af4eaf6fb4662a5 Mon Sep 17 00:00:00 2001
From: Siu Kwan Lam <1929845+sklam@users.noreply.github.com>
Date: Mon, 13 Mar 2023 16:23:40 -0500
Subject: [PATCH 45/45] Fix llvmlite dependency in meta.yaml

---
 buildscripts/condarecipe.local/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/buildscripts/condarecipe.local/meta.yaml b/buildscripts/condarecipe.local/meta.yaml
index 2c9bf8f6c17..3b3232d457a 100644
--- a/buildscripts/condarecipe.local/meta.yaml
+++ b/buildscripts/condarecipe.local/meta.yaml
@@ -34,7 +34,7 @@ requirements:
     - setuptools
     - importlib_metadata       # [py<39]
     # On channel https://anaconda.org/numba/
-    - llvmlite >=0.40,<0.40
+    - llvmlite >=0.40.0dev0,<0.40
     # TBB devel version is to match TBB libs.
     # 2020.3 is the last version with the "old" ABI
     # NOTE: 2021.1..2021.5 are API compatible for Numba's purposes.