diff --git a/pip/llvmlite.file b/pip/llvmlite.file
index 28d8ef95165..329a9d73e75 100644
--- a/pip/llvmlite.file
+++ b/pip/llvmlite.file
@@ -1,5 +1,4 @@
 Requires: llvm
-Patch0: py3-llvmlite-14
 
 %define source0 git+https://github.com/numba/llvmlite?obj=main/%{realversion}&export=llvmlite-%{realversion}&output=/source.tar.gz
 %define PipPreBuild export LLVM_CONFIG=${LLVM_ROOT}/bin/llvm-config
diff --git a/pip/requirements.txt b/pip/requirements.txt
index 7301f3bb315..cb9ac53c7db 100644
--- a/pip/requirements.txt
+++ b/pip/requirements.txt
@@ -151,7 +151,7 @@ kiwisolver==1.4.4
 law==0.1.7
 lazy-object-proxy==1.7.1
 lizard==1.17.10
-llvmlite==778380378bb856b10d4d77f45aa9386f8de4d940
+llvmlite==v0.41.0dev0
 lockfile==0.12.2
 luigi==3.1.1
 lxml==4.9.1
diff --git a/py3-llvmlite-14.patch b/py3-llvmlite-14.patch
deleted file mode 100644
index 088c40da230..00000000000
--- a/py3-llvmlite-14.patch
+++ /dev/null
@@ -1,3129 +0,0 @@
-From c37e824380fec443edb24c914b1767dcff496d38 Mon Sep 17 00:00:00 2001
-From: Andre Masella <andre@masella.name>
-Date: Tue, 5 Apr 2022 15:22:21 -0400
-Subject: [PATCH] Update to LLVM 12-14
-
-Modify llvmlite to support LLVM 11-14 and modify conda recipe to build LLVM14.
-Also lift over all patches to LLVM versions as required.
----
- ...-Limit-size-of-non-GlobalValue-name.patch} |    0
- ...tch => llvm11-consecutive_registers.patch} |    0
- ...-entrypoints-in-add-TLI-mappings.ll.patch} |    0
- ...atch => llvm11-intel-D47188-svml-VF.patch} |    0
- ...o-static.patch => llvm11-lto-static.patch} |    0
- ...ing.patch => llvm11-partial-testing.patch} |    0
- ...t-Limit-size-of-non-GlobalValue-name.patch |   49 +
- .../llvm12-consecutive_registers.patch        |  181 ++
- conda-recipes/llvm12-lto-static.patch         |   12 +
- conda-recipes/llvm13-lto-static.patch         |   12 +
- .../llvm14-remove-use-of-clonefile.patch      |   54 +
- conda-recipes/llvm14-svml.patch               | 2192 +++++++++++++++++
- conda-recipes/llvmdev/bld.bat                 |   45 +-
- conda-recipes/llvmdev/build.sh                |   24 +-
- conda-recipes/llvmdev/meta.yaml               |   33 +-
- conda-recipes/llvmdev/numba-3016.ll           |   80 -
- conda-recipes/llvmlite/bld.bat                |    5 +-
- conda-recipes/llvmlite/meta.yaml              |   10 +-
- ffi/Makefile.freebsd                          |    2 +-
- ffi/Makefile.osx                              |    4 +-
- ffi/build.py                                  |   15 +-
- ffi/passmanagers.cpp                          |    9 +-
- ffi/targets.cpp                               |    8 +
- ffi/value.cpp                                 |   13 +-
- llvmlite/binding/passmanagers.py              |    3 +-
- llvmlite/tests/test_binding.py                |    2 +-
- 26 files changed, 2583 insertions(+), 170 deletions(-)
- rename conda-recipes/{0001-Revert-Limit-size-of-non-GlobalValue-name.patch => llvm11-0001-Revert-Limit-size-of-non-GlobalValue-name.patch} (100%)
- rename conda-recipes/{llvm_11_consecutive_registers.patch => llvm11-consecutive_registers.patch} (100%)
- rename conda-recipes/{expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch => llvm11-expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch} (100%)
- rename conda-recipes/{intel-D47188-svml-VF.patch => llvm11-intel-D47188-svml-VF.patch} (100%)
- rename conda-recipes/{llvm-lto-static.patch => llvm11-lto-static.patch} (100%)
- rename conda-recipes/{partial-testing.patch => llvm11-partial-testing.patch} (100%)
- create mode 100644 conda-recipes/llvm12-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
- create mode 100644 conda-recipes/llvm12-consecutive_registers.patch
- create mode 100644 conda-recipes/llvm12-lto-static.patch
- create mode 100644 conda-recipes/llvm13-lto-static.patch
- create mode 100644 conda-recipes/llvm14-remove-use-of-clonefile.patch
- create mode 100644 conda-recipes/llvm14-svml.patch
- delete mode 100644 conda-recipes/llvmdev/numba-3016.ll
-
-diff --git a/conda-recipes/0001-Revert-Limit-size-of-non-GlobalValue-name.patch b/conda-recipes/llvm11-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
-similarity index 100%
-rename from conda-recipes/0001-Revert-Limit-size-of-non-GlobalValue-name.patch
-rename to conda-recipes/llvm11-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
-diff --git a/conda-recipes/llvm_11_consecutive_registers.patch b/conda-recipes/llvm11-consecutive_registers.patch
-similarity index 100%
-rename from conda-recipes/llvm_11_consecutive_registers.patch
-rename to conda-recipes/llvm11-consecutive_registers.patch
-diff --git a/conda-recipes/expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch b/conda-recipes/llvm11-expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
-similarity index 100%
-rename from conda-recipes/expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
-rename to conda-recipes/llvm11-expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
-diff --git a/conda-recipes/intel-D47188-svml-VF.patch b/conda-recipes/llvm11-intel-D47188-svml-VF.patch
-similarity index 100%
-rename from conda-recipes/intel-D47188-svml-VF.patch
-rename to conda-recipes/llvm11-intel-D47188-svml-VF.patch
-diff --git a/conda-recipes/llvm-lto-static.patch b/conda-recipes/llvm11-lto-static.patch
-similarity index 100%
-rename from conda-recipes/llvm-lto-static.patch
-rename to conda-recipes/llvm11-lto-static.patch
-diff --git a/conda-recipes/partial-testing.patch b/conda-recipes/llvm11-partial-testing.patch
-similarity index 100%
-rename from conda-recipes/partial-testing.patch
-rename to conda-recipes/llvm11-partial-testing.patch
-diff --git a/conda-recipes/llvm12-0001-Revert-Limit-size-of-non-GlobalValue-name.patch b/conda-recipes/llvm12-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
-new file mode 100644
-index 000000000..9b722d36c
---- /dev/null
-+++ b/conda-recipes/llvm12-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
-@@ -0,0 +1,49 @@
-+diff -ur a/lib/IR/Value.cpp b/lib/IR/Value.cpp
-+--- a/lib/IR/Value.cpp	2021-04-06 12:38:18.000000000 -0400
-++++ b/lib/IR/Value.cpp	2022-03-31 15:39:31.000000000 -0400
-+@@ -38,10 +38,6 @@
-+ 
-+ using namespace llvm;
-+ 
-+-static cl::opt<unsigned> NonGlobalValueMaxNameSize(
-+-    "non-global-value-max-name-size", cl::Hidden, cl::init(1024),
-+-    cl::desc("Maximum size for the name of non-global values."));
-+-
-+ //===----------------------------------------------------------------------===//
-+ //                                Value Class
-+ //===----------------------------------------------------------------------===//
-+@@ -319,11 +315,6 @@
-+   if (getName() == NameRef)
-+     return;
-+ 
-+-  // Cap the size of non-GlobalValue names.
-+-  if (NameRef.size() > NonGlobalValueMaxNameSize && !isa<GlobalValue>(this))
-+-    NameRef =
-+-        NameRef.substr(0, std::max(1u, (unsigned)NonGlobalValueMaxNameSize));
-+-
-+   assert(!getType()->isVoidTy() && "Cannot assign a name to void values!");
-+ 
-+   // Get the symbol table to update for this object.
-+diff -ur a/test/Bitcode/value-with-long-name.ll b/test/Bitcode/value-with-long-name.ll
-+deleted file mode 1000644
-+--- a/test/Bitcode/value-with-long-name.ll
-++++ /dev/null
-+@@ -1,18 +0,0 @@
-+-; Check the size of generated variable when no option is set
-+-; RUN: opt -S %s -O2 -o - | FileCheck -check-prefix=CHECK-LONG %s
-+-; CHECK-LONG: %{{[a-z]{4}[a-z]+}}
-+-
-+-; Then check we correctly cap the size of newly generated non-global values name
-+-; Force the size to be small so that the check works on release and debug build
-+-; RUN: opt -S %s -O2 -o - -non-global-value-max-name-size=0 | FileCheck -check-prefix=CHECK-SHORT %s
-+-; RUN: opt -S %s -O2 -o - -non-global-value-max-name-size=1 | FileCheck -check-prefix=CHECK-SHORT %s
-+-; CHECK-SHORT-NOT: %{{[a-z][a-z]+}}
-+-
-+-define i32 @f(i32 %a, i32 %b) {
-+-  %c = add i32 %a, %b
-+-  %d = add i32 %c, %a
-+-  %e = add i32 %d, %b
-+-  ret i32 %e
-+-}
-+-
-+-
-diff --git a/conda-recipes/llvm12-consecutive_registers.patch b/conda-recipes/llvm12-consecutive_registers.patch
-new file mode 100644
-index 000000000..cc60217bd
---- /dev/null
-+++ b/conda-recipes/llvm12-consecutive_registers.patch
-@@ -0,0 +1,181 @@
-+diff -ur a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h
-+--- a/include/llvm/CodeGen/TargetLowering.h	2021-04-06 12:38:18.000000000 -0400
-++++ b/include/llvm/CodeGen/TargetLowering.h	2022-03-31 15:52:45.000000000 -0400
-+@@ -3975,7 +3975,8 @@
-+   /// must be passed in a block of consecutive registers.
-+   virtual bool
-+   functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv,
-+-                                            bool isVarArg) const {
-++                                            bool isVarArg,
-++                                            const DataLayout &DL) const {
-+     return false;
-+   }
-+ 
-+diff -ur a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
-+--- a/lib/CodeGen/SelectionDAG/FastISel.cpp	2021-04-06 12:38:18.000000000 -0400
-++++ b/lib/CodeGen/SelectionDAG/FastISel.cpp	2022-03-31 15:52:45.000000000 -0400
-+@@ -1087,7 +1087,7 @@
-+     if (Arg.IsByVal)
-+       FinalType = cast<PointerType>(Arg.Ty)->getElementType();
-+     bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
-+-        FinalType, CLI.CallConv, CLI.IsVarArg);
-++        FinalType, CLI.CallConv, CLI.IsVarArg, DL);
-+ 
-+     ISD::ArgFlagsTy Flags;
-+     if (Arg.IsZExt)
-+diff -ur a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
-+--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp	2021-04-06 12:38:18.000000000 -0400
-++++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp	2022-03-31 15:52:45.000000000 -0400
-+@@ -1851,7 +1851,7 @@
-+ 
-+       bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
-+           I.getOperand(0)->getType(), F->getCallingConv(),
-+-          /*IsVarArg*/ false);
-++          /*IsVarArg*/ false, DL);
-+ 
-+       ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
-+       if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
-+@@ -9229,7 +9229,7 @@
-+     CLI.IsTailCall = false;
-+   } else {
-+     bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
-+-        CLI.RetTy, CLI.CallConv, CLI.IsVarArg);
-++        CLI.RetTy, CLI.CallConv, CLI.IsVarArg, DL);
-+     for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
-+       ISD::ArgFlagsTy Flags;
-+       if (NeedsRegBlock) {
-+@@ -9289,7 +9289,7 @@
-+     if (Args[i].IsByVal)
-+       FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
-+     bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
-+-        FinalType, CLI.CallConv, CLI.IsVarArg);
-++        FinalType, CLI.CallConv, CLI.IsVarArg, DL);
-+     for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
-+          ++Value) {
-+       EVT VT = ValueVTs[Value];
-+@@ -9830,7 +9830,7 @@
-+     if (Arg.hasAttribute(Attribute::ByVal))
-+       FinalType = Arg.getParamByValType();
-+     bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
-+-        FinalType, F.getCallingConv(), F.isVarArg());
-++        FinalType, F.getCallingConv(), F.isVarArg(), DL);
-+     for (unsigned Value = 0, NumValues = ValueVTs.size();
-+          Value != NumValues; ++Value) {
-+       EVT VT = ValueVTs[Value];
-+diff -ur a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
-+--- a/lib/Target/AArch64/AArch64ISelLowering.cpp	2021-04-06 12:38:18.000000000 -0400
-++++ b/lib/Target/AArch64/AArch64ISelLowering.cpp	2022-03-31 15:52:45.000000000 -0400
-+@@ -30,6 +30,7 @@
-+ #include "llvm/ADT/Triple.h"
-+ #include "llvm/ADT/Twine.h"
-+ #include "llvm/Analysis/VectorUtils.h"
-++#include "llvm/CodeGen/Analysis.h"
-+ #include "llvm/CodeGen/CallingConvLower.h"
-+ #include "llvm/CodeGen/MachineBasicBlock.h"
-+ #include "llvm/CodeGen/MachineFrameInfo.h"
-+@@ -16455,15 +16456,17 @@
-+ }
-+ 
-+ bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
-+-    Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
-+-  if (Ty->isArrayTy())
-+-    return true;
-+-
-+-  const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
-+-  if (TySize.isScalable() && TySize.getKnownMinSize() > 128)
-+-    return true;
-++    Type *Ty, CallingConv::ID CallConv, bool isVarArg,
-++    const DataLayout &DL) const {
-++  if (!Ty->isArrayTy()) {
-++    const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
-++    return TySize.isScalable() && TySize.getKnownMinSize() > 128;
-++  }
-+ 
-+-  return false;
-++  // All non aggregate members of the type must have the same type
-++  SmallVector<EVT, 0> ValueVTs;
-++  ComputeValueVTs(*this, DL, Ty, ValueVTs);
-++  return is_splat(ValueVTs);
-+ }
-+ 
-+ bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
-+diff -ur a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
-+--- a/lib/Target/AArch64/AArch64ISelLowering.h	2021-04-06 12:38:18.000000000 -0400
-++++ b/lib/Target/AArch64/AArch64ISelLowering.h	2022-03-31 15:52:45.000000000 -0400
-+@@ -770,9 +770,10 @@
-+   MachineMemOperand::Flags getTargetMMOFlags(
-+     const Instruction &I) const override;
-+ 
-+-  bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
-+-                                                 CallingConv::ID CallConv,
-+-                                                 bool isVarArg) const override;
-++  bool functionArgumentNeedsConsecutiveRegisters(
-++      Type *Ty, CallingConv::ID CallConv, bool isVarArg,
-++      const DataLayout &DL) const override;
-++
-+   /// Used for exception handling on Win64.
-+   bool needsFixedCatchObjects() const override;
-+ 
-+diff -ur a/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
-+--- a/lib/Target/AArch64/GISel/AArch64CallLowering.cpp	2021-04-06 12:38:18.000000000 -0400
-++++ b/lib/Target/AArch64/GISel/AArch64CallLowering.cpp	2022-03-31 15:52:45.000000000 -0400
-+@@ -259,7 +259,7 @@
-+   assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
-+ 
-+   bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
-+-      OrigArg.Ty, CallConv, false);
-++      OrigArg.Ty, CallConv, false, DL);
-+   for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
-+     Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
-+     SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags[0],
-+diff -ur a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
-+--- a/lib/Target/ARM/ARMCallLowering.cpp	2021-04-06 12:38:18.000000000 -0400
-++++ b/lib/Target/ARM/ARMCallLowering.cpp	2022-03-31 15:52:45.000000000 -0400
-+@@ -218,7 +218,7 @@
-+ 
-+     bool NeedsConsecutiveRegisters =
-+         TLI.functionArgumentNeedsConsecutiveRegisters(
-+-            SplitTy, F.getCallingConv(), F.isVarArg());
-++            SplitTy, F.getCallingConv(), F.isVarArg(), DL);
-+     if (NeedsConsecutiveRegisters) {
-+       Flags.setInConsecutiveRegs();
-+       if (i == e - 1)
-+diff -ur a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
-+--- a/lib/Target/ARM/ARMISelLowering.cpp	2021-04-06 12:38:18.000000000 -0400
-++++ b/lib/Target/ARM/ARMISelLowering.cpp	2022-03-31 15:52:45.000000000 -0400
-+@@ -19269,7 +19269,8 @@
-+ /// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
-+ /// passing according to AAPCS rules.
-+ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
-+-    Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
-++    Type *Ty, CallingConv::ID CallConv, bool isVarArg,
-++    const DataLayout &DL) const {
-+   if (getEffectiveCallingConv(CallConv, isVarArg) !=
-+       CallingConv::ARM_AAPCS_VFP)
-+     return false;
-+diff -ur a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
-+--- a/lib/Target/ARM/ARMISelLowering.h	2021-04-06 12:38:18.000000000 -0400
-++++ b/lib/Target/ARM/ARMISelLowering.h	2022-03-31 15:52:45.000000000 -0400
-+@@ -578,7 +578,8 @@
-+     /// Returns true if an argument of type Ty needs to be passed in a
-+     /// contiguous block of registers in calling convention CallConv.
-+     bool functionArgumentNeedsConsecutiveRegisters(
-+-        Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
-++        Type *Ty, CallingConv::ID CallConv, bool isVarArg,
-++        const DataLayout &DL) const override;
-+ 
-+     /// If a physical register, this returns the register that receives the
-+     /// exception address on entry to an EH pad.
-+diff -ur a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
-+--- a/lib/Target/PowerPC/PPCISelLowering.h	2021-04-06 12:38:18.000000000 -0400
-++++ b/lib/Target/PowerPC/PPCISelLowering.h	2022-03-31 15:52:45.000000000 -0400
-+@@ -998,7 +998,8 @@
-+     /// Returns true if an argument of type Ty needs to be passed in a
-+     /// contiguous block of registers in calling convention CallConv.
-+     bool functionArgumentNeedsConsecutiveRegisters(
-+-      Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override {
-++        Type *Ty, CallingConv::ID CallConv, bool isVarArg,
-++        const DataLayout &DL) const override {
-+       // We support any array type as "consecutive" block in the parameter
-+       // save area.  The element type defines the alignment requirement and
-+       // whether the argument should go in GPRs, FPRs, or VRs if available.
-diff --git a/conda-recipes/llvm12-lto-static.patch b/conda-recipes/llvm12-lto-static.patch
-new file mode 100644
-index 000000000..76cc55def
---- /dev/null
-+++ b/conda-recipes/llvm12-lto-static.patch
-@@ -0,0 +1,12 @@
-+diff -ur a/tools/lto/CMakeLists.txt b/tools/lto/CMakeLists.txt
-+--- llvm-12.0.0.src-orig/tools/lto/CMakeLists.txt	2021-04-06 12:38:18.000000000 -0400
-++++ llvm-12.0.0.src/tools/lto/CMakeLists.txt	2022-03-31 15:46:00.000000000 -0400
-+@@ -21,7 +21,7 @@
-+ 
-+ set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/lto.exports)
-+ 
-+-add_llvm_library(LTO SHARED INSTALL_WITH_TOOLCHAIN ${SOURCES} DEPENDS
-++add_llvm_library(LTO INSTALL_WITH_TOOLCHAIN ${SOURCES} DEPENDS
-+     intrinsics_gen)
-+ 
-+ install(FILES ${LLVM_MAIN_INCLUDE_DIR}/llvm-c/lto.h
-diff --git a/conda-recipes/llvm13-lto-static.patch b/conda-recipes/llvm13-lto-static.patch
-new file mode 100644
-index 000000000..b8a624250
---- /dev/null
-+++ b/conda-recipes/llvm13-lto-static.patch
-@@ -0,0 +1,12 @@
-+diff -ur llvm-13.0.0.src-orig/tools/lto/CMakeLists.txt llvm-13.0.0.src/tools/lto/CMakeLists.txt
-+--- llvm-13.0.0.src-orig/tools/lto/CMakeLists.txt	2021-09-24 12:18:10.000000000 -0400
-++++ llvm-13.0.0.src/tools/lto/CMakeLists.txt	2022-03-31 17:07:07.000000000 -0400
-+@@ -25,7 +25,7 @@
-+     set(LTO_LIBRARY_TYPE MODULE)
-+     set(LTO_LIBRARY_NAME libLTO)
-+   else()
-+-    set(LTO_LIBRARY_TYPE SHARED)
-++    set(LTO_LIBRARY_TYPE STATIC)
-+     set(LTO_LIBRARY_NAME LTO)
-+ endif()
-+ 
-diff --git a/conda-recipes/llvm14-remove-use-of-clonefile.patch b/conda-recipes/llvm14-remove-use-of-clonefile.patch
-new file mode 100644
-index 000000000..6ef9c9d61
---- /dev/null
-+++ b/conda-recipes/llvm14-remove-use-of-clonefile.patch
-@@ -0,0 +1,54 @@
-+diff -ur a/llvm-14.0.6.src/lib/Support/Unix/Path.inc b/llvm-14.0.6.src/lib/Support/Unix/Path.inc
-+--- a/llvm-14.0.6.src/lib/Support/Unix/Path.inc	2022-03-14 05:44:55.000000000 -0400
-++++ b/llvm-14.0.6.src/lib/Support/Unix/Path.inc	2022-09-19 11:30:59.000000000 -0400
-+@@ -1462,6 +1462,7 @@
-+ std::error_code copy_file(const Twine &From, const Twine &To) {
-+   std::string FromS = From.str();
-+   std::string ToS = To.str();
-++  /*
-+ #if __has_builtin(__builtin_available)
-+   if (__builtin_available(macos 10.12, *)) {
-+     // Optimistically try to use clonefile() and handle errors, rather than
-+@@ -1490,6 +1491,7 @@
-+     // cheaper.
-+   }
-+ #endif
-++  */
-+   if (!copyfile(FromS.c_str(), ToS.c_str(), /*State=*/NULL, COPYFILE_DATA))
-+     return std::error_code();
-+   return std::error_code(errno, std::generic_category());
-+diff -ur a/llvm-14.0.6.src/unittests/Support/Path.cpp b/llvm-14.0.6.src/unittests/Support/Path.cpp
-+--- a/llvm-14.0.6.src/unittests/Support/Path.cpp	2022-03-14 05:44:55.000000000 -0400
-++++ b/llvm-14.0.6.src/unittests/Support/Path.cpp	2022-09-19 11:33:07.000000000 -0400
-+@@ -2267,15 +2267,15 @@
-+ 
-+   EXPECT_EQ(fs::setPermissions(TempPath, fs::set_uid_on_exe), NoError);
-+   EXPECT_TRUE(CheckPermissions(fs::set_uid_on_exe));
-+-
-++#if !defined(__APPLE__)
-+   EXPECT_EQ(fs::setPermissions(TempPath, fs::set_gid_on_exe), NoError);
-+   EXPECT_TRUE(CheckPermissions(fs::set_gid_on_exe));
-+-
-++#endif
-+   // Modern BSDs require root to set the sticky bit on files.
-+   // AIX and Solaris without root will mask off (i.e., lose) the sticky bit
-+   // on files.
-+ #if !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) &&  \
-+-    !defined(_AIX) && !(defined(__sun__) && defined(__svr4__))
-++    !defined(_AIX) && !(defined(__sun__) && defined(__svr4__)) && !defined(__APPLE__)
-+   EXPECT_EQ(fs::setPermissions(TempPath, fs::sticky_bit), NoError);
-+   EXPECT_TRUE(CheckPermissions(fs::sticky_bit));
-+ 
-+@@ -2297,10 +2297,12 @@
-+   EXPECT_TRUE(CheckPermissions(fs::all_perms));
-+ #endif // !FreeBSD && !NetBSD && !OpenBSD && !AIX
-+ 
-++#if !defined(__APPLE__)
-+   EXPECT_EQ(fs::setPermissions(TempPath, fs::all_perms & ~fs::sticky_bit),
-+                                NoError);
-+   EXPECT_TRUE(CheckPermissions(fs::all_perms & ~fs::sticky_bit));
-+ #endif
-++#endif
-+ }
-+ 
-+ #ifdef _WIN32
-diff --git a/conda-recipes/llvm14-svml.patch b/conda-recipes/llvm14-svml.patch
-new file mode 100644
-index 000000000..cdce26b34
---- /dev/null
-+++ b/conda-recipes/llvm14-svml.patch
-@@ -0,0 +1,2192 @@
-+From bc2dcd190b7148d04772fa7fcd18b5200b758d4a Mon Sep 17 00:00:00 2001
-+From: Ivan Butygin <ivan.butygin@gmail.com>
-+Date: Sun, 24 Jul 2022 20:31:29 +0200
-+Subject: [PATCH] Fixes vectorizer and extends SVML support
-+
-+Patch was updated to fix SVML calling convention issues uncovered by llvm 10.
-+In previous versions of patch SVML calling convention was selected based on
-+compilation settings. So if you try to call 256bit vector function from avx512
-+code function will be called with avx512 cc which is incorrect. To fix this
-+SVML cc was separated into 3 different cc for 128, 256 and 512bit vector lengths
-+which are selected based on actual input vector length.
-+
-+Original patch merged several fixes:
-+
-+1. https://reviews.llvm.org/D47188 patch fixes the problem with improper calls
-+to SVML library as it has non-standard calling conventions. So accordingly it
-+has SVML calling conventions definitions and code to set CC to the vectorized
-+calls. As SVML provides several implementations for the math functions we also
-+took into consideration fast attribute and select more fast implementation in
-+such case. This work is based on original Matt Masten's work.
-+Author: Denis Nagorny
-+
-+2. https://reviews.llvm.org/D53035 patch implements support to legalize SVML
-+calls by breaking down the illegal vector call instruction into multiple legal
-+vector call instructions during code generation. Currently the vectorizer does
-+not check legality of the generated SVML (or any VECLIB) call instructions, and
-+this can lead to potential problems even during vector type legalization. This
-+patch addresses this issue by adding a legality check during code generation and
-+replaces the illegal SVML call with corresponding legalized instructions.
-+(RFC: http://lists.llvm.org/pipermail/llvm-dev/2018-June/124357.html)
-+Author: Karthik Senthil
-+---
-+ .../include/llvm/Analysis/TargetLibraryInfo.h |  22 +-
-+ llvm/include/llvm/AsmParser/LLToken.h         |   3 +
-+ llvm/include/llvm/IR/CMakeLists.txt           |   4 +
-+ llvm/include/llvm/IR/CallingConv.h            |   5 +
-+ llvm/include/llvm/IR/SVML.td                  |  62 +++
-+ llvm/lib/Analysis/CMakeLists.txt              |   1 +
-+ llvm/lib/Analysis/TargetLibraryInfo.cpp       |  55 +-
-+ llvm/lib/AsmParser/LLLexer.cpp                |   3 +
-+ llvm/lib/AsmParser/LLParser.cpp               |   6 +
-+ llvm/lib/CodeGen/ReplaceWithVeclib.cpp        |   2 +-
-+ llvm/lib/IR/AsmWriter.cpp                     |   3 +
-+ llvm/lib/IR/Verifier.cpp                      |   3 +
-+ llvm/lib/Target/X86/X86CallingConv.td         |  70 +++
-+ llvm/lib/Target/X86/X86ISelLowering.cpp       |   3 +-
-+ llvm/lib/Target/X86/X86RegisterInfo.cpp       |  46 ++
-+ llvm/lib/Target/X86/X86Subtarget.h            |   3 +
-+ .../Transforms/Utils/InjectTLIMappings.cpp    |   2 +-
-+ .../Transforms/Vectorize/LoopVectorize.cpp    | 269 +++++++++
-+ .../Generic/replace-intrinsics-with-veclib.ll |   4 +-
-+ .../LoopVectorize/X86/svml-calls-finite.ll    |  24 +-
-+ .../LoopVectorize/X86/svml-calls.ll           | 108 ++--
-+ .../LoopVectorize/X86/svml-legal-calls.ll     | 513 ++++++++++++++++++
-+ .../LoopVectorize/X86/svml-legal-codegen.ll   |  61 +++
-+ llvm/test/Transforms/Util/add-TLI-mappings.ll |  18 +-
-+ llvm/utils/TableGen/CMakeLists.txt            |   1 +
-+ llvm/utils/TableGen/SVMLEmitter.cpp           | 110 ++++
-+ llvm/utils/TableGen/TableGen.cpp              |   8 +-
-+ llvm/utils/TableGen/TableGenBackends.h        |   1 +
-+ llvm/utils/vim/syntax/llvm.vim                |   1 +
-+ 29 files changed, 1341 insertions(+), 70 deletions(-)
-+ create mode 100644 llvm/include/llvm/IR/SVML.td
-+ create mode 100644 llvm/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll
-+ create mode 100644 llvm/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll
-+ create mode 100644 llvm/utils/TableGen/SVMLEmitter.cpp
-+
-+diff --git a/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h b/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h
-+index 17d1e3f770c14..110ff08189867 100644
-+--- a/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h
-++++ b/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h
-+@@ -39,6 +39,12 @@ struct VecDesc {
-+     NotLibFunc
-+   };
-+ 
-++enum SVMLAccuracy {
-++  SVML_DEFAULT,
-++  SVML_HA,
-++  SVML_EP
-++};
-++
-+ /// Implementation of the target library information.
-+ ///
-+ /// This class constructs tables that hold the target library information and
-+@@ -157,7 +163,7 @@ class TargetLibraryInfoImpl {
-+   /// Return true if the function F has a vector equivalent with vectorization
-+   /// factor VF.
-+   bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const {
-+-    return !getVectorizedFunction(F, VF).empty();
-++    return !getVectorizedFunction(F, VF, false).empty();
-+   }
-+ 
-+   /// Return true if the function F has a vector equivalent with any
-+@@ -166,7 +172,10 @@ class TargetLibraryInfoImpl {
-+ 
-+   /// Return the name of the equivalent of F, vectorized with factor VF. If no
-+   /// such mapping exists, return the empty string.
-+-  StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const;
-++  std::string getVectorizedFunction(StringRef F, const ElementCount &VF, bool IsFast) const;
-++
-++  Optional<CallingConv::ID> getVectorizedFunctionCallingConv(
-++    StringRef F, const FunctionType &FTy, const DataLayout &DL) const;
-+ 
-+   /// Set to true iff i32 parameters to library functions should have signext
-+   /// or zeroext attributes if they correspond to C-level int or unsigned int,
-+@@ -326,8 +335,13 @@ class TargetLibraryInfo {
-+   bool isFunctionVectorizable(StringRef F) const {
-+     return Impl->isFunctionVectorizable(F);
-+   }
-+-  StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const {
-+-    return Impl->getVectorizedFunction(F, VF);
-++  std::string getVectorizedFunction(StringRef F, const ElementCount &VF, bool IsFast) const {
-++    return Impl->getVectorizedFunction(F, VF, IsFast);
-++  }
-++
-++  Optional<CallingConv::ID> getVectorizedFunctionCallingConv(
-++    StringRef F, const FunctionType &FTy, const DataLayout &DL) const {
-++    return Impl->getVectorizedFunctionCallingConv(F, FTy, DL);
-+   }
-+ 
-+   /// Tests if the function is both available and a candidate for optimized code
-+diff --git a/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h b/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h
-+index 78ebb35e0ea4d..3ffb57db8b18b 100644
-+--- a/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h
-++++ b/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h
-+@@ -133,6 +133,9 @@ enum Kind {
-+   kw_fastcc,
-+   kw_coldcc,
-+   kw_intel_ocl_bicc,
-++  kw_intel_svmlcc128,
-++  kw_intel_svmlcc256,
-++  kw_intel_svmlcc512,
-+   kw_cfguard_checkcc,
-+   kw_x86_stdcallcc,
-+   kw_x86_fastcallcc,
-+diff --git a/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt b/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt
-+index 0498fc269b634..23bb3de41bc1a 100644
-+--- a/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt
-++++ b/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt
-+@@ -20,3 +20,7 @@ tablegen(LLVM IntrinsicsX86.h -gen-intrinsic-enums -intrinsic-prefix=x86)
-+ tablegen(LLVM IntrinsicsXCore.h -gen-intrinsic-enums -intrinsic-prefix=xcore)
-+ tablegen(LLVM IntrinsicsVE.h -gen-intrinsic-enums -intrinsic-prefix=ve)
-+ add_public_tablegen_target(intrinsics_gen)
-++
-++set(LLVM_TARGET_DEFINITIONS SVML.td)
-++tablegen(LLVM SVML.inc -gen-svml)
-++add_public_tablegen_target(svml_gen)
-+diff --git a/llvm-14.0.6.src/include/llvm/IR/CallingConv.h b/llvm-14.0.6.src/include/llvm/IR/CallingConv.h
-+index fd28542465225..096eea1a8e19b 100644
-+--- a/llvm-14.0.6.src/include/llvm/IR/CallingConv.h
-++++ b/llvm-14.0.6.src/include/llvm/IR/CallingConv.h
-+@@ -252,6 +252,11 @@ namespace CallingConv {
-+     /// M68k_INTR - Calling convention used for M68k interrupt routines.
-+     M68k_INTR = 101,
-+ 
-++    /// Intel_SVML - Calling conventions for Intel Short Math Vector Library
-++    Intel_SVML128 = 102,
-++    Intel_SVML256 = 103,
-++    Intel_SVML512 = 104,
-++
-+     /// The highest possible calling convention ID. Must be some 2^k - 1.
-+     MaxID = 1023
-+   };
-+diff --git a/llvm-14.0.6.src/include/llvm/IR/SVML.td b/llvm-14.0.6.src/include/llvm/IR/SVML.td
-+new file mode 100644
-+index 0000000000000..5af710404c9d9
-+--- /dev/null
-++++ b/llvm-14.0.6.src/include/llvm/IR/SVML.td
-+@@ -0,0 +1,62 @@
-++//===-- Intel_SVML.td - Defines SVML call variants ---------*- tablegen -*-===//
-++//
-++//                     The LLVM Compiler Infrastructure
-++//
-++// This file is distributed under the University of Illinois Open Source
-++// License. See LICENSE.TXT for details.
-++//
-++//===----------------------------------------------------------------------===//
-++//
-++// This file is used by TableGen to define the different typs of SVML function
-++// variants used with -fveclib=SVML.
-++//
-++//===----------------------------------------------------------------------===//
-++
-++class SvmlVariant;
-++
-++def sin        : SvmlVariant;
-++def cos        : SvmlVariant;
-++def pow        : SvmlVariant;
-++def exp        : SvmlVariant;
-++def log        : SvmlVariant;
-++def acos       : SvmlVariant;
-++def acosh      : SvmlVariant;
-++def asin       : SvmlVariant;
-++def asinh      : SvmlVariant;
-++def atan2      : SvmlVariant;
-++def atan       : SvmlVariant;
-++def atanh      : SvmlVariant;
-++def cbrt       : SvmlVariant;
-++def cdfnorm    : SvmlVariant;
-++def cdfnorminv : SvmlVariant;
-++def cosd       : SvmlVariant;
-++def cosh       : SvmlVariant;
-++def erf        : SvmlVariant;
-++def erfc       : SvmlVariant;
-++def erfcinv    : SvmlVariant;
-++def erfinv     : SvmlVariant;
-++def exp10      : SvmlVariant;
-++def exp2       : SvmlVariant;
-++def expm1      : SvmlVariant;
-++def hypot      : SvmlVariant;
-++def invsqrt    : SvmlVariant;
-++def log10      : SvmlVariant;
-++def log1p      : SvmlVariant;
-++def log2       : SvmlVariant;
-++def sind       : SvmlVariant;
-++def sinh       : SvmlVariant;
-++def sqrt       : SvmlVariant;
-++def tan        : SvmlVariant;
-++def tanh       : SvmlVariant;
-++
-++// TODO: SVML does not currently provide _ha and _ep variants of these fucnctions.
-++// We should call the default variant of these functions in all cases instead.
-++
-++// def nearbyint  : SvmlVariant;
-++// def logb       : SvmlVariant;
-++// def floor      : SvmlVariant;
-++// def fmod       : SvmlVariant;
-++// def ceil       : SvmlVariant;
-++// def trunc      : SvmlVariant;
-++// def rint       : SvmlVariant;
-++// def round      : SvmlVariant;
-+diff --git a/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt b/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt
-+index aec84124129f4..98286e166fbe2 100644
-+--- a/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt
-++++ b/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt
-+@@ -150,6 +150,7 @@ add_llvm_component_library(LLVMAnalysis
-+   DEPENDS
-+   intrinsics_gen
-+   ${MLDeps}
-++  svml_gen
-+ 
-+   LINK_LIBS
-+   ${MLLinkDeps}
-+diff --git a/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp b/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp
-+index 02923c2c7eb14..83abde28a62a4 100644
-+--- a/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp
-++++ b/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp
-+@@ -110,6 +110,11 @@ bool TargetLibraryInfoImpl::isCallingConvCCompatible(Function *F) {
-+                                     F->getFunctionType());
-+ }
-+ 
-++static std::string svmlMangle(StringRef FnName, const bool IsFast) {
-++  std::string FullName = FnName.str();
-++  return IsFast ? FullName : FullName + "_ha";
-++}
-++
-+ /// Initialize the set of available library functions based on the specified
-+ /// target triple. This should be carefully written so that a missing target
-+ /// triple gets a sane set of defaults.
-+@@ -1876,8 +1881,9 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
-+   }
-+   case SVML: {
-+     const VecDesc VecFuncs[] = {
-+-    #define TLI_DEFINE_SVML_VECFUNCS
-+-    #include "llvm/Analysis/VecFuncs.def"
-++    #define GET_SVML_VARIANTS
-++    #include "llvm/IR/SVML.inc"
-++    #undef GET_SVML_VARIANTS
-+     };
-+     addVectorizableFunctions(VecFuncs);
-+     break;
-+@@ -1897,20 +1903,51 @@ bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
-+   return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName;
-+ }
-+ 
-+-StringRef
-+-TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
-+-                                             const ElementCount &VF) const {
-++std::string TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
-++                                                         const ElementCount &VF,
-++                                                         bool IsFast) const {
-++  bool FromSVML = ClVectorLibrary == SVML;
-+   F = sanitizeFunctionName(F);
-+   if (F.empty())
-+-    return F;
-++    return F.str();
-+   std::vector<VecDesc>::const_iterator I =
-+       llvm::lower_bound(VectorDescs, F, compareWithScalarFnName);
-+   while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) {
-+-    if (I->VectorizationFactor == VF)
-+-      return I->VectorFnName;
-++    if (I->VectorizationFactor == VF) {
-++      if (FromSVML) {
-++        return svmlMangle(I->VectorFnName, IsFast);
-++      }
-++      return I->VectorFnName.str();
-++    }
-+     ++I;
-+   }
-+-  return StringRef();
-++  return std::string();
-++}
-++
-++static CallingConv::ID getSVMLCallingConv(const DataLayout &DL, const FunctionType &FType)
-++{
-++  assert(isa<VectorType>(FType.getReturnType()));
-++  auto *VecCallRetType = cast<VectorType>(FType.getReturnType());
-++  auto TypeBitWidth = DL.getTypeSizeInBits(VecCallRetType);
-++  if (TypeBitWidth == 128) {
-++    return CallingConv::Intel_SVML128;
-++  } else if (TypeBitWidth == 256) {
-++    return CallingConv::Intel_SVML256;
-++  } else if (TypeBitWidth == 512) {
-++    return CallingConv::Intel_SVML512;
-++  } else {
-++    llvm_unreachable("Invalid vector width");
-++  }
-++  return 0; // not reachable
-++}
-++
-++Optional<CallingConv::ID>
-++TargetLibraryInfoImpl::getVectorizedFunctionCallingConv(
-++    StringRef F, const FunctionType &FTy, const DataLayout &DL) const {
-++  if (F.startswith("__svml")) {
-++    return getSVMLCallingConv(DL, FTy);
-++  }
-++  return {};
-+ }
-+ 
-+ TargetLibraryInfo TargetLibraryAnalysis::run(const Function &F,
-+diff --git a/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp b/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp
-+index e3bf41c9721b6..4f9dccd4e0724 100644
-+--- a/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp
-++++ b/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp
-+@@ -603,6 +603,9 @@ lltok::Kind LLLexer::LexIdentifier() {
-+   KEYWORD(spir_kernel);
-+   KEYWORD(spir_func);
-+   KEYWORD(intel_ocl_bicc);
-++  KEYWORD(intel_svmlcc128);
-++  KEYWORD(intel_svmlcc256);
-++  KEYWORD(intel_svmlcc512);
-+   KEYWORD(x86_64_sysvcc);
-+   KEYWORD(win64cc);
-+   KEYWORD(x86_regcallcc);
-+diff --git a/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp b/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp
-+index 432ec151cf8ae..3bd6ee61024b8 100644
-+--- a/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp
-++++ b/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp
-+@@ -1781,6 +1781,9 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
-+ ///   ::= 'ccc'
-+ ///   ::= 'fastcc'
-+ ///   ::= 'intel_ocl_bicc'
-++///   ::= 'intel_svmlcc128'
-++///   ::= 'intel_svmlcc256'
-++///   ::= 'intel_svmlcc512'
-+ ///   ::= 'coldcc'
-+ ///   ::= 'cfguard_checkcc'
-+ ///   ::= 'x86_stdcallcc'
-+@@ -1850,6 +1853,9 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
-+   case lltok::kw_spir_kernel:    CC = CallingConv::SPIR_KERNEL; break;
-+   case lltok::kw_spir_func:      CC = CallingConv::SPIR_FUNC; break;
-+   case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break;
-++  case lltok::kw_intel_svmlcc128:CC = CallingConv::Intel_SVML128; break;
-++  case lltok::kw_intel_svmlcc256:CC = CallingConv::Intel_SVML256; break;
-++  case lltok::kw_intel_svmlcc512:CC = CallingConv::Intel_SVML512; break;
-+   case lltok::kw_x86_64_sysvcc:  CC = CallingConv::X86_64_SysV; break;
-+   case lltok::kw_win64cc:        CC = CallingConv::Win64; break;
-+   case lltok::kw_webkit_jscc:    CC = CallingConv::WebKit_JS; break;
-+diff --git a/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp
-+index 0ff045fa787e8..175651949ef85 100644
-+--- a/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp
-++++ b/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp
-+@@ -157,7 +157,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
-+   // and the exact vector width of the call operands in the
-+   // TargetLibraryInfo.
-+   const std::string TLIName =
-+-      std::string(TLI.getVectorizedFunction(ScalarName, VF));
-++      std::string(TLI.getVectorizedFunction(ScalarName, VF, CI.getFastMathFlags().isFast()));
-+ 
-+   LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
-+                     << ScalarName << "` and vector width " << VF << ".\n");
-+diff --git a/llvm-14.0.6.src/lib/IR/AsmWriter.cpp b/llvm-14.0.6.src/lib/IR/AsmWriter.cpp
-+index 179754e275b03..c4e95752c97e8 100644
-+--- a/llvm-14.0.6.src/lib/IR/AsmWriter.cpp
-++++ b/llvm-14.0.6.src/lib/IR/AsmWriter.cpp
-+@@ -306,6 +306,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
-+   case CallingConv::X86_RegCall:   Out << "x86_regcallcc"; break;
-+   case CallingConv::X86_VectorCall:Out << "x86_vectorcallcc"; break;
-+   case CallingConv::Intel_OCL_BI:  Out << "intel_ocl_bicc"; break;
-++  case CallingConv::Intel_SVML128: Out << "intel_svmlcc128"; break;
-++  case CallingConv::Intel_SVML256: Out << "intel_svmlcc256"; break;
-++  case CallingConv::Intel_SVML512: Out << "intel_svmlcc512"; break;
-+   case CallingConv::ARM_APCS:      Out << "arm_apcscc"; break;
-+   case CallingConv::ARM_AAPCS:     Out << "arm_aapcscc"; break;
-+   case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break;
-+diff --git a/llvm-14.0.6.src/lib/IR/Verifier.cpp b/llvm-14.0.6.src/lib/IR/Verifier.cpp
-+index 989d01e2e3950..bae7382a36e13 100644
-+--- a/llvm-14.0.6.src/lib/IR/Verifier.cpp
-++++ b/llvm-14.0.6.src/lib/IR/Verifier.cpp
-+@@ -2457,6 +2457,9 @@ void Verifier::visitFunction(const Function &F) {
-+   case CallingConv::Fast:
-+   case CallingConv::Cold:
-+   case CallingConv::Intel_OCL_BI:
-++  case CallingConv::Intel_SVML128:
-++  case CallingConv::Intel_SVML256:
-++  case CallingConv::Intel_SVML512:
-+   case CallingConv::PTX_Kernel:
-+   case CallingConv::PTX_Device:
-+     Assert(!F.isVarArg(), "Calling convention does not support varargs or "
-+diff --git a/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td b/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td
-+index 4dd8a6cdd8982..12e65521215e4 100644
-+--- a/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td
-++++ b/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td
-+@@ -498,6 +498,21 @@ def RetCC_X86_64 : CallingConv<[
-+   CCDelegateTo<RetCC_X86_64_C>
-+ ]>;
-+ 
-++// Intel_SVML return-value convention.
-++def RetCC_Intel_SVML : CallingConv<[
-++  // Vector types are returned in XMM0,XMM1
-++  CCIfType<[v4f32, v2f64],
-++            CCAssignToReg<[XMM0,XMM1]>>,
-++
-++  // 256-bit FP vectors
-++  CCIfType<[v8f32, v4f64],
-++            CCAssignToReg<[YMM0,YMM1]>>,
-++
-++  // 512-bit FP vectors
-++  CCIfType<[v16f32, v8f64],
-++            CCAssignToReg<[ZMM0,ZMM1]>>
-++]>;
-++
-+ // This is the return-value convention used for the entire X86 backend.
-+ let Entry = 1 in
-+ def RetCC_X86 : CallingConv<[
-+@@ -505,6 +520,10 @@ def RetCC_X86 : CallingConv<[
-+   // Check if this is the Intel OpenCL built-ins calling convention
-+   CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<RetCC_Intel_OCL_BI>>,
-+ 
-++  CCIfCC<"CallingConv::Intel_SVML128", CCDelegateTo<RetCC_Intel_SVML>>,
-++  CCIfCC<"CallingConv::Intel_SVML256", CCDelegateTo<RetCC_Intel_SVML>>,
-++  CCIfCC<"CallingConv::Intel_SVML512", CCDelegateTo<RetCC_Intel_SVML>>,
-++
-+   CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
-+   CCDelegateTo<RetCC_X86_32>
-+ ]>;
-+@@ -1064,6 +1083,30 @@ def CC_Intel_OCL_BI : CallingConv<[
-+   CCDelegateTo<CC_X86_32_C>
-+ ]>;
-+ 
-++// X86-64 Intel Short Vector Math Library calling convention.
-++def CC_Intel_SVML : CallingConv<[
-++
-++  // The SSE vector arguments are passed in XMM registers.
-++  CCIfType<[v4f32, v2f64],
-++           CCAssignToReg<[XMM0, XMM1, XMM2]>>,
-++
-++  // The 256-bit vector arguments are passed in YMM registers.
-++  CCIfType<[v8f32, v4f64],
-++           CCAssignToReg<[YMM0, YMM1, YMM2]>>,
-++
-++  // The 512-bit vector arguments are passed in ZMM registers.
-++  CCIfType<[v16f32, v8f64],
-++           CCAssignToReg<[ZMM0, ZMM1, ZMM2]>>
-++]>;
-++
-++def CC_X86_32_Intr : CallingConv<[
-++  CCAssignToStack<4, 4>
-++]>;
-++
-++def CC_X86_64_Intr : CallingConv<[
-++  CCAssignToStack<8, 8>
-++]>;
-++
-+ //===----------------------------------------------------------------------===//
-+ // X86 Root Argument Calling Conventions
-+ //===----------------------------------------------------------------------===//
-+@@ -1115,6 +1158,9 @@ def CC_X86_64 : CallingConv<[
-+ let Entry = 1 in
-+ def CC_X86 : CallingConv<[
-+   CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<CC_Intel_OCL_BI>>,
-++  CCIfCC<"CallingConv::Intel_SVML128", CCDelegateTo<CC_Intel_SVML>>,
-++  CCIfCC<"CallingConv::Intel_SVML256", CCDelegateTo<CC_Intel_SVML>>,
-++  CCIfCC<"CallingConv::Intel_SVML512", CCDelegateTo<CC_Intel_SVML>>,
-+   CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
-+   CCDelegateTo<CC_X86_32>
-+ ]>;
-+@@ -1227,3 +1273,27 @@ def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP,
-+                                                (sequence "R%u", 12, 15))>;
-+ def CSR_SysV64_RegCall       : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,               
-+                                                (sequence "XMM%u", 8, 15))>;
-++
-++// SVML calling convention
-++def CSR_32_Intel_SVML        : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE)>;
-++def CSR_32_Intel_SVML_AVX512 : CalleeSavedRegs<(add CSR_32_Intel_SVML,
-++                                                K4, K5, K6, K7)>;
-++
-++def CSR_64_Intel_SVML_NoSSE : CalleeSavedRegs<(add RBX, RSI, RDI, RBP, RSP, R12, R13, R14, R15)>;
-++
-++def CSR_64_Intel_SVML       : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
-++                                               (sequence "XMM%u", 8, 15))>;
-++def CSR_Win64_Intel_SVML    : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
-++                                               (sequence "XMM%u", 6, 15))>;
-++
-++def CSR_64_Intel_SVML_AVX        : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
-++                                                    (sequence "YMM%u", 8, 15))>;
-++def CSR_Win64_Intel_SVML_AVX     : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
-++                                                    (sequence "YMM%u", 6, 15))>;
-++
-++def CSR_64_Intel_SVML_AVX512     : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
-++                                                    (sequence "ZMM%u", 16, 31),
-++                                                    K4, K5, K6, K7)>;
-++def CSR_Win64_Intel_SVML_AVX512  : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
-++                                                    (sequence "ZMM%u", 6, 21),
-++                                                    K4, K5, K6, K7)>;
-+diff --git a/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp b/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp
-+index 8bb7e81e19bbd..1780ce3fc6467 100644
-+--- a/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp
-++++ b/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp
-+@@ -3788,7 +3788,8 @@ void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
-+   // FIXME: Only some x86_32 calling conventions support AVX512.
-+   if (Subtarget.useAVX512Regs() &&
-+       (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
-+-                     CallConv == CallingConv::Intel_OCL_BI)))
-++                     CallConv == CallingConv::Intel_OCL_BI   ||
-++                     CallConv == CallingConv::Intel_SVML512)))
-+     VecVT = MVT::v16f32;
-+   else if (Subtarget.hasAVX())
-+     VecVT = MVT::v8f32;
-+diff --git a/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp b/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp
-+index 130cb61cdde24..9eec3b25ca9f2 100644
-+--- a/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp
-++++ b/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp
-+@@ -272,6 +272,42 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
-+   }
-+ }
-+ 
-++namespace {
-++std::pair<const uint32_t *, const MCPhysReg *> getSVMLRegMaskAndSaveList(
-++  bool Is64Bit, bool IsWin64, CallingConv::ID CC) {
-++  assert(CC >= CallingConv::Intel_SVML128 && CC <= CallingConv::Intel_SVML512);
-++  unsigned Abi = CC - CallingConv::Intel_SVML128 ; // 0 - 128, 1 - 256, 2 - 512
-++
-++  const std::pair<const uint32_t *, const MCPhysReg *> Abi64[] = {
-++    std::make_pair(CSR_64_Intel_SVML_RegMask,        CSR_64_Intel_SVML_SaveList),
-++    std::make_pair(CSR_64_Intel_SVML_AVX_RegMask,    CSR_64_Intel_SVML_AVX_SaveList),
-++    std::make_pair(CSR_64_Intel_SVML_AVX512_RegMask, CSR_64_Intel_SVML_AVX512_SaveList),
-++  };
-++
-++  const std::pair<const uint32_t *, const MCPhysReg *> AbiWin64[] = {
-++    std::make_pair(CSR_Win64_Intel_SVML_RegMask,        CSR_Win64_Intel_SVML_SaveList),
-++    std::make_pair(CSR_Win64_Intel_SVML_AVX_RegMask,    CSR_Win64_Intel_SVML_AVX_SaveList),
-++    std::make_pair(CSR_Win64_Intel_SVML_AVX512_RegMask, CSR_Win64_Intel_SVML_AVX512_SaveList),
-++  };
-++
-++  const std::pair<const uint32_t *, const MCPhysReg *> Abi32[] = {
-++    std::make_pair(CSR_32_Intel_SVML_RegMask,        CSR_32_Intel_SVML_SaveList),
-++    std::make_pair(CSR_32_Intel_SVML_RegMask,        CSR_32_Intel_SVML_SaveList),
-++    std::make_pair(CSR_32_Intel_SVML_AVX512_RegMask, CSR_32_Intel_SVML_AVX512_SaveList),
-++  };
-++
-++  if (Is64Bit) {
-++    if (IsWin64) {
-++      return AbiWin64[Abi];
-++    } else {
-++      return Abi64[Abi];
-++    }
-++  } else {
-++    return Abi32[Abi];
-++  }
-++}
-++}
-++
-+ const MCPhysReg *
-+ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-+   assert(MF && "MachineFunction required");
-+@@ -327,6 +363,11 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-+       return CSR_64_Intel_OCL_BI_SaveList;
-+     break;
-+   }
-++  case CallingConv::Intel_SVML128:
-++  case CallingConv::Intel_SVML256:
-++  case CallingConv::Intel_SVML512: {
-++    return getSVMLRegMaskAndSaveList(Is64Bit, IsWin64, CC).second;
-++  }
-+   case CallingConv::HHVM:
-+     return CSR_64_HHVM_SaveList;
-+   case CallingConv::X86_RegCall:
-+@@ -449,6 +490,11 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
-+       return CSR_64_Intel_OCL_BI_RegMask;
-+     break;
-+   }
-++  case CallingConv::Intel_SVML128:
-++  case CallingConv::Intel_SVML256:
-++  case CallingConv::Intel_SVML512: {
-++    return getSVMLRegMaskAndSaveList(Is64Bit, IsWin64, CC).first;
-++  }
-+   case CallingConv::HHVM:
-+     return CSR_64_HHVM_RegMask;
-+   case CallingConv::X86_RegCall:
-+diff --git a/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h b/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h
-+index 5d773f0c57dfb..6bdf5bc6f3fe9 100644
-+--- a/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h
-++++ b/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h
-+@@ -916,6 +916,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
-+     case CallingConv::X86_ThisCall:
-+     case CallingConv::X86_VectorCall:
-+     case CallingConv::Intel_OCL_BI:
-++    case CallingConv::Intel_SVML128:
-++    case CallingConv::Intel_SVML256:
-++    case CallingConv::Intel_SVML512:
-+       return isTargetWin64();
-+     // This convention allows using the Win64 convention on other targets.
-+     case CallingConv::Win64:
-+diff --git a/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp
-+index 047bf5569ded3..59897785f156c 100644
-+--- a/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp
-++++ b/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp
-+@@ -92,7 +92,7 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
-+ 
-+   auto AddVariantDecl = [&](const ElementCount &VF) {
-+     const std::string TLIName =
-+-        std::string(TLI.getVectorizedFunction(ScalarName, VF));
-++        std::string(TLI.getVectorizedFunction(ScalarName, VF, CI.getFastMathFlags().isFast()));
-+     if (!TLIName.empty()) {
-+       std::string MangledName =
-+           VFABI::mangleTLIVectorName(TLIName, ScalarName, CI.arg_size(), VF);
-+diff --git a/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp
-+index 46ff0994e04e7..f472af5e1a835 100644
-+--- a/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp
-++++ b/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp
-+@@ -712,6 +712,27 @@ class InnerLoopVectorizer {
-+   virtual void printDebugTracesAtStart(){};
-+   virtual void printDebugTracesAtEnd(){};
-+ 
-++  /// Check legality of given SVML call instruction \p VecCall generated for
-++  /// scalar call \p Call. If illegal then the appropriate legal instruction
-++  /// is returned.
-++  Value *legalizeSVMLCall(CallInst *VecCall, CallInst *Call);
-++
-++  /// Returns the legal VF for a call instruction \p CI using TTI information
-++  /// and vector type.
-++  ElementCount getLegalVFForCall(CallInst *CI);
-++
-++  /// Partially vectorize a given call \p Call by breaking it down into multiple
-++  /// calls of \p LegalCall, decided by the variant VF \p LegalVF.
-++  Value *partialVectorizeCall(CallInst *Call, CallInst *LegalCall,
-++                              unsigned LegalVF);
-++
-++  /// Generate shufflevector instruction for a vector value \p V based on the
-++  /// current \p Part and a smaller VF \p LegalVF.
-++  Value *generateShuffleValue(Value *V, unsigned LegalVF, unsigned Part);
-++
-++  /// Combine partially vectorized calls stored in \p CallResults.
-++  Value *combinePartialVecCalls(SmallVectorImpl<Value *> &CallResults);
-++
-+   /// The original loop.
-+   Loop *OrigLoop;
-+ 
-+@@ -4596,6 +4617,17 @@ static bool mayDivideByZero(Instruction &I) {
-+   return !CInt || CInt->isZero();
-+ }
-+ 
-++static void setVectorFunctionCallingConv(CallInst &CI, const DataLayout &DL,
-++                                         const TargetLibraryInfo &TLI) {
-++  Function *VectorF = CI.getCalledFunction();
-++  FunctionType *FTy = VectorF->getFunctionType();
-++  StringRef VFName = VectorF->getName();
-++  auto CC = TLI.getVectorizedFunctionCallingConv(VFName, *FTy, DL);
-++  if (CC) {
-++    CI.setCallingConv(*CC);
-++  }
-++}
-++
-+ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
-+                                                VPUser &ArgOperands,
-+                                                VPTransformState &State) {
-+@@ -4664,9 +4696,246 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
-+       if (isa<FPMathOperator>(V))
-+         V->copyFastMathFlags(CI);
-+ 
-++    const DataLayout &DL = V->getModule()->getDataLayout();
-++    setVectorFunctionCallingConv(*V, DL, *TLI);
-++
-++    // Perform legalization of SVML call instruction only if original call
-++    // was not Intrinsic
-++    if (!UseVectorIntrinsic &&
-++        (V->getCalledFunction()->getName()).startswith("__svml")) {
-++      // assert((V->getCalledFunction()->getName()).startswith("__svml"));
-++      LLVM_DEBUG(dbgs() << "LV(SVML): Vector call inst:"; V->dump());
-++      auto *LegalV = cast<Instruction>(legalizeSVMLCall(V, CI));
-++      LLVM_DEBUG(dbgs() << "LV: Completed SVML legalization.\n LegalV: ";
-++                 LegalV->dump());
-++      State.set(Def, LegalV, Part);
-++      addMetadata(LegalV, &I);
-++    } else {
-+       State.set(Def, V, Part);
-+       addMetadata(V, &I);
-++    }
-++  }
-++}
-++
-++//===----------------------------------------------------------------------===//
-++// Implementation of functions for SVML vector call legalization.
-++//===----------------------------------------------------------------------===//
-++//
-++// Unlike other VECLIBs, SVML needs to be used with target-legal
-++// vector types. Otherwise, link failures and/or runtime failures
-++// will occur. A motivating example could be -
-++//
-++//   double *a;
-++//   float *b;
-++//   #pragma clang loop vectorize_width(8)
-++//   for(i = 0; i < N; ++i) {
-++//     a[i] = sin(i);   // Legal SVML VF must be 4 or below on AVX
-++//     b[i] = cosf(i);  // VF can be 8 on AVX since 8 floats can fit in YMM
-++//    }
-++//
-++// Current implementation of vector code generation in LV is
-++// driven based on a single VF (in InnerLoopVectorizer::VF). This
-++// inhibits the flexibility of adjusting/choosing different VF
-++// for different instructions.
-++//
-++// Due to this limitation it is much more straightforward to
-++// first generate the illegal sin8 (svml_sin8 for SVML vector
-++// library) call and then legalize it than trying to avoid
-++// generating illegal code from the beginning.
-++//
-++// A solution for this problem is to check legality of the
-++// call instruction right after generating it in vectorizer and
-++// if it is illegal we split the call arguments and issue multiple
-++// calls to match the legal VF. This is demonstrated currently for
-++// the SVML vector library calls (non-intrinsic version only).
-++//
-++// Future directions and extensions:
-++// 1) This legalization example shows us that a good direction
-++//    for the VPlan framework would be to model the vector call
-++//    instructions in a way that legal VF for each call is chosen
-++//    correctly within vectorizer and illegal code generation is
-++//    avoided.
-++// 2) This logic can also be extended to general vector functions
-++//    i.e. legalization OpenMP decalre simd functions. The
-++//    requirements needed for this will be documented soon.
-++
-++Value *InnerLoopVectorizer::legalizeSVMLCall(CallInst *VecCall,
-++                                             CallInst *Call) {
-++  ElementCount LegalVF = getLegalVFForCall(VecCall);
-++
-++  assert(LegalVF.getKnownMinValue() > 1 &&
-++         "Legal VF for SVML call must be greater than 1 to vectorize");
-++
-++  if (LegalVF == VF)
-++    return VecCall;
-++  else if (LegalVF.getKnownMinValue() > VF.getKnownMinValue())
-++    // TODO: handle case when we are underfilling vectors
-++    return VecCall;
-++
-++  // Legal VF for this SVML call is smaller than chosen VF, break it down into
-++  // smaller call instructions
-++
-++  // Convert args, types and return type to match legal VF
-++  SmallVector<Type *, 4> NewTys;
-++  SmallVector<Value *, 4> NewArgs;
-++
-++  for (Value *ArgOperand : Call->args()) {
-++    Type *Ty = ToVectorTy(ArgOperand->getType(), LegalVF);
-++    NewTys.push_back(Ty);
-++    NewArgs.push_back(UndefValue::get(Ty));
-+   }
-++
-++  // Construct legal vector function
-++  const VFShape Shape =
-++    VFShape::get(*Call, LegalVF /*EC*/, false /*HasGlobalPred*/);
-++  Function *LegalVectorF = VFDatabase(*Call).getVectorizedFunction(Shape);
-++  assert(LegalVectorF != nullptr && "Can't create legal vector function.");
-++
-++  LLVM_DEBUG(dbgs() << "LV(SVML): LegalVectorF: "; LegalVectorF->dump());
-++
-++  SmallVector<OperandBundleDef, 1> OpBundles;
-++  Call->getOperandBundlesAsDefs(OpBundles);
-++  auto LegalV = std::unique_ptr<CallInst>(CallInst::Create(LegalVectorF, NewArgs, OpBundles));
-++
-++  if (isa<FPMathOperator>(LegalV))
-++    LegalV->copyFastMathFlags(Call);
-++
-++  const DataLayout &DL = VecCall->getModule()->getDataLayout();
-++  // Set SVML calling conventions
-++  setVectorFunctionCallingConv(*LegalV, DL, *TLI);
-++
-++  LLVM_DEBUG(dbgs() << "LV(SVML): LegalV: "; LegalV->dump());
-++
-++  Value *LegalizedCall = partialVectorizeCall(VecCall, LegalV.get(), LegalVF.getKnownMinValue());
-++
-++  LLVM_DEBUG(dbgs() << "LV(SVML): LegalizedCall: "; LegalizedCall->dump());
-++
-++  // Remove the illegal call from Builder
-++  VecCall->eraseFromParent();
-++
-++  return LegalizedCall;
-++}
-++
-++ElementCount InnerLoopVectorizer::getLegalVFForCall(CallInst *CI) {
-++  const DataLayout DL = CI->getModule()->getDataLayout();
-++  FunctionType *CallFT = CI->getFunctionType();
-++  // All functions that need legalization should have a vector return type.
-++  // This is true for all SVML functions that are currently supported.
-++  assert(isa<VectorType>(CallFT->getReturnType()) &&
-++         "Return type of call that needs legalization is not a vector.");
-++  auto *VecCallRetType = cast<VectorType>(CallFT->getReturnType());
-++  Type *ElemType = VecCallRetType->getElementType();
-++
-++  unsigned TypeBitWidth = DL.getTypeSizeInBits(ElemType);
-++  unsigned VectorBitWidth = TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector);
-++  unsigned LegalVF = VectorBitWidth / TypeBitWidth;
-++
-++  LLVM_DEBUG(dbgs() << "LV(SVML): Type Bit Width: " << TypeBitWidth << "\n");
-++  LLVM_DEBUG(dbgs() << "LV(SVML): Current VL: " << VF << "\n");
-++  LLVM_DEBUG(dbgs() << "LV(SVML): Vector Bit Width: " << VectorBitWidth
-++                    << "\n");
-++  LLVM_DEBUG(dbgs() << "LV(SVML): Legal Target VL: " << LegalVF << "\n");
-++
-++  return ElementCount::getFixed(LegalVF);
-++}
-++
-++// Partial vectorization of a call instruction is achieved by making clones of
-++// \p LegalCall and overwriting its argument operands with shufflevector
-++// equivalent decided based on \p LegalVF and current Part being filled.
-++Value *InnerLoopVectorizer::partialVectorizeCall(CallInst *Call,
-++                                                 CallInst *LegalCall,
-++                                                 unsigned LegalVF) {
-++  unsigned NumParts = VF.getKnownMinValue() / LegalVF;
-++  LLVM_DEBUG(dbgs() << "LV(SVML): NumParts: " << NumParts << "\n");
-++  SmallVector<Value *, 8> CallResults;
-++
-++  for (unsigned Part = 0; Part < NumParts; ++Part) {
-++    auto *ClonedCall = cast<CallInst>(LegalCall->clone());
-++
-++    // Update the arg operand of cloned call to shufflevector
-++    for (unsigned i = 0, ie = Call->arg_size(); i != ie; ++i) {
-++      auto *NewOp = generateShuffleValue(Call->getArgOperand(i), LegalVF, Part);
-++      ClonedCall->setArgOperand(i, NewOp);
-++    }
-++
-++    LLVM_DEBUG(dbgs() << "LV(SVML): ClonedCall: "; ClonedCall->dump());
-++
-++    auto *PartialVecCall = Builder.Insert(ClonedCall);
-++    CallResults.push_back(PartialVecCall);
-++  }
-++
-++  return combinePartialVecCalls(CallResults);
-++}
-++
-++Value *InnerLoopVectorizer::generateShuffleValue(Value *V, unsigned LegalVF,
-++                                                 unsigned Part) {
-++  // Example:
-++  // Consider the following vector code -
-++  // %1 = sitofp <4 x i32> %0 to <4 x double>
-++  // %2 = call <4 x double> @__svml_sin4(<4 x double> %1)
-++  //
-++  // If the LegalVF is 2, we partially vectorize the sin4 call by invoking
-++  // generateShuffleValue on the operand %1
-++  // If Part = 1, output value is -
-++  // %shuffle = shufflevector <4 x double> %1, <4 x double> undef, <2 x i32><i32 0, i32 1>
-++  // and if Part = 2, output is -
-++  // %shuffle7 =shufflevector <4 x double> %1, <4 x double> undef, <2 x i32><i32 2, i32 3>
-++
-++  assert(isa<VectorType>(V->getType()) &&
-++         "Cannot generate shuffles for non-vector values.");
-++  SmallVector<int, 4> ShuffleMask;
-++  Value *Undef = UndefValue::get(V->getType());
-++
-++  unsigned ElemIdx = Part * LegalVF;
-++
-++  for (unsigned K = 0; K < LegalVF; K++)
-++    ShuffleMask.push_back(static_cast<int>(ElemIdx + K));
-++
-++  auto *ShuffleInst =
-++      Builder.CreateShuffleVector(V, Undef, ShuffleMask, "shuffle");
-++
-++  return ShuffleInst;
-++}
-++
-++// Results of the calls executed by smaller legal call instructions must be
-++// combined to match the original VF for later use. This is done by constructing
-++// shufflevector instructions in a cumulative fashion.
-++Value *InnerLoopVectorizer::combinePartialVecCalls(
-++    SmallVectorImpl<Value *> &CallResults) {
-++  assert(isa<VectorType>(CallResults[0]->getType()) &&
-++         "Cannot combine calls with non-vector results.");
-++  auto *CallType = cast<VectorType>(CallResults[0]->getType());
-++
-++  Value *CombinedShuffle;
-++  unsigned NumElems = CallType->getElementCount().getKnownMinValue() * 2;
-++  unsigned NumRegs = CallResults.size();
-++
-++  assert(NumRegs >= 2 && isPowerOf2_32(NumRegs) &&
-++         "Number of partial vector calls to combine must be a power of 2 "
-++         "(atleast 2^1)");
-++
-++  while (NumRegs > 1) {
-++    for (unsigned I = 0; I < NumRegs; I += 2) {
-++      SmallVector<int, 4> ShuffleMask;
-++      for (unsigned J = 0; J < NumElems; J++)
-++        ShuffleMask.push_back(static_cast<int>(J));
-++
-++      CombinedShuffle = Builder.CreateShuffleVector(
-++          CallResults[I], CallResults[I + 1], ShuffleMask, "combined");
-++      LLVM_DEBUG(dbgs() << "LV(SVML): CombinedShuffle:";
-++                 CombinedShuffle->dump());
-++      CallResults.push_back(CombinedShuffle);
-++    }
-++
-++    SmallVector<Value *, 2>::iterator Start = CallResults.begin();
-++    SmallVector<Value *, 2>::iterator End = Start + NumRegs;
-++    CallResults.erase(Start, End);
-++
-++    NumElems *= 2;
-++    NumRegs /= 2;
-++  }
-++
-++  return CombinedShuffle;
-+ }
-+ 
-+ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
-+diff --git a/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll b/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
-+index df8b7c498bd00..63a36549f18fd 100644
-+--- a/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
-++++ b/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
-+@@ -10,7 +10,7 @@ target triple = "x86_64-unknown-linux-gnu"
-+ define <4 x double> @exp_v4(<4 x double> %in) {
-+ ; SVML-LABEL: define {{[^@]+}}@exp_v4
-+ ; SVML-SAME: (<4 x double> [[IN:%.*]]) {
-+-; SVML-NEXT:    [[TMP1:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[IN]])
-++; SVML-NEXT:    [[TMP1:%.*]] = call <4 x double> @__svml_exp4_ha(<4 x double> [[IN]])
-+ ; SVML-NEXT:    ret <4 x double> [[TMP1]]
-+ ;
-+ ; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_v4
-+@@ -37,7 +37,7 @@ declare <4 x double> @llvm.exp.v4f64(<4 x double>) #0
-+ define <4 x float> @exp_f32(<4 x float> %in) {
-+ ; SVML-LABEL: define {{[^@]+}}@exp_f32
-+ ; SVML-SAME: (<4 x float> [[IN:%.*]]) {
-+-; SVML-NEXT:    [[TMP1:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[IN]])
-++; SVML-NEXT:    [[TMP1:%.*]] = call <4 x float> @__svml_expf4_ha(<4 x float> [[IN]])
-+ ; SVML-NEXT:    ret <4 x float> [[TMP1]]
-+ ;
-+ ; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_f32
-+diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
-+index a6e191c3d6923..d6e2e11106949 100644
-+--- a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
-++++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
-+@@ -39,7 +39,8 @@ for.end:                                          ; preds = %for.body
-+ declare double @__exp_finite(double) #0
-+ 
-+ ; CHECK-LABEL: @exp_f64
-+-; CHECK: <4 x double> @__svml_exp4
-++; CHECK: <2 x double> @__svml_exp2
-++; CHECK: <2 x double> @__svml_exp2
-+ ; CHECK: ret
-+ define void @exp_f64(double* nocapture %varray) {
-+ entry:
-+@@ -99,7 +100,8 @@ for.end:                                          ; preds = %for.body
-+ declare double @__log_finite(double) #0
-+ 
-+ ; CHECK-LABEL: @log_f64
-+-; CHECK: <4 x double> @__svml_log4
-++; CHECK: <2 x double> @__svml_log2
-++; CHECK: <2 x double> @__svml_log2
-+ ; CHECK: ret
-+ define void @log_f64(double* nocapture %varray) {
-+ entry:
-+@@ -159,7 +161,8 @@ for.end:                                          ; preds = %for.body
-+ declare double @__pow_finite(double, double) #0
-+ 
-+ ; CHECK-LABEL: @pow_f64
-+-; CHECK: <4 x double> @__svml_pow4
-++; CHECK: <2 x double> @__svml_pow2
-++; CHECK: <2 x double> @__svml_pow2
-+ ; CHECK: ret
-+ define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
-+ entry:
-+@@ -190,7 +193,8 @@ declare float @__exp2f_finite(float) #0
-+ 
-+ define void @exp2f_finite(float* nocapture %varray) {
-+ ; CHECK-LABEL: @exp2f_finite(
-+-; CHECK:    call <4 x float> @__svml_exp2f4(<4 x float> %{{.*}})
-++; CHECK:    call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> %{{.*}})
-++; CHECK:    call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> %{{.*}})
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -219,7 +223,8 @@ declare double @__exp2_finite(double) #0
-+ 
-+ define void @exp2_finite(double* nocapture %varray) {
-+ ; CHECK-LABEL: @exp2_finite(
-+-; CHECK:    call <4 x double> @__svml_exp24(<4 x double> {{.*}})
-++; CHECK:    call intel_svmlcc128 <2 x double> @__svml_exp22_ha(<2 x double> {{.*}})
-++; CHECK:    call intel_svmlcc128 <2 x double> @__svml_exp22_ha(<2 x double> {{.*}})
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -276,7 +281,8 @@ for.end:                                          ; preds = %for.body
-+ declare double @__log2_finite(double) #0
-+ 
-+ ; CHECK-LABEL: @log2_f64
-+-; CHECK: <4 x double> @__svml_log24
-++; CHECK: <2 x double> @__svml_log22
-++; CHECK: <2 x double> @__svml_log22
-+ ; CHECK: ret
-+ define void @log2_f64(double* nocapture %varray) {
-+ entry:
-+@@ -333,7 +339,8 @@ for.end:                                          ; preds = %for.body
-+ declare double @__log10_finite(double) #0
-+ 
-+ ; CHECK-LABEL: @log10_f64
-+-; CHECK: <4 x double> @__svml_log104
-++; CHECK: <2 x double> @__svml_log102
-++; CHECK: <2 x double> @__svml_log102
-+ ; CHECK: ret
-+ define void @log10_f64(double* nocapture %varray) {
-+ entry:
-+@@ -390,7 +397,8 @@ for.end:                                          ; preds = %for.body
-+ declare double @__sqrt_finite(double) #0
-+ 
-+ ; CHECK-LABEL: @sqrt_f64
-+-; CHECK: <4 x double> @__svml_sqrt4
-++; CHECK: <2 x double> @__svml_sqrt2
-++; CHECK: <2 x double> @__svml_sqrt2
-+ ; CHECK: ret
-+ define void @sqrt_f64(double* nocapture %varray) {
-+ entry:
-+diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll
-+index 42c280df6ad02..088bbdcf1aa4a 100644
-+--- a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll
-++++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll
-+@@ -48,7 +48,7 @@ declare float @llvm.exp2.f32(float) #0
-+ 
-+ define void @sin_f64(double* nocapture %varray) {
-+ ; CHECK-LABEL: @sin_f64(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -71,7 +71,7 @@ for.end:
-+ 
-+ define void @sin_f32(float* nocapture %varray) {
-+ ; CHECK-LABEL: @sin_f32(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_sinf4_ha(<4 x float> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -94,7 +94,7 @@ for.end:
-+ 
-+ define void @sin_f64_intrinsic(double* nocapture %varray) {
-+ ; CHECK-LABEL: @sin_f64_intrinsic(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -117,7 +117,7 @@ for.end:
-+ 
-+ define void @sin_f32_intrinsic(float* nocapture %varray) {
-+ ; CHECK-LABEL: @sin_f32_intrinsic(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_sinf4_ha(<4 x float> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -140,7 +140,7 @@ for.end:
-+ 
-+ define void @cos_f64(double* nocapture %varray) {
-+ ; CHECK-LABEL: @cos_f64(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -163,7 +163,7 @@ for.end:
-+ 
-+ define void @cos_f32(float* nocapture %varray) {
-+ ; CHECK-LABEL: @cos_f32(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_cosf4_ha(<4 x float> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -186,7 +186,7 @@ for.end:
-+ 
-+ define void @cos_f64_intrinsic(double* nocapture %varray) {
-+ ; CHECK-LABEL: @cos_f64_intrinsic(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -209,7 +209,7 @@ for.end:
-+ 
-+ define void @cos_f32_intrinsic(float* nocapture %varray) {
-+ ; CHECK-LABEL: @cos_f32_intrinsic(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_cosf4_ha(<4 x float> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -232,7 +232,7 @@ for.end:
-+ 
-+ define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
-+ ; CHECK-LABEL: @pow_f64(
-+-; CHECK:    [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
-++; CHECK:    [[TMP8:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -257,7 +257,7 @@ for.end:
-+ 
-+ define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
-+ ; CHECK-LABEL: @pow_f64_intrinsic(
-+-; CHECK:    [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
-++; CHECK:    [[TMP8:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -282,7 +282,7 @@ for.end:
-+ 
-+ define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
-+ ; CHECK-LABEL: @pow_f32(
-+-; CHECK:    [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
-++; CHECK:    [[TMP8:%.*]] = call intel_svmlcc128 <4 x float> @__svml_powf4_ha(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -307,7 +307,7 @@ for.end:
-+ 
-+ define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
-+ ; CHECK-LABEL: @pow_f32_intrinsic(
-+-; CHECK:    [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
-++; CHECK:    [[TMP8:%.*]] = call intel_svmlcc128 <4 x float> @__svml_powf4_ha(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -332,7 +332,7 @@ for.end:
-+ 
-+ define void @exp_f64(double* nocapture %varray) {
-+ ; CHECK-LABEL: @exp_f64(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -355,7 +355,7 @@ for.end:
-+ 
-+ define void @exp_f32(float* nocapture %varray) {
-+ ; CHECK-LABEL: @exp_f32(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_expf4_ha(<4 x float> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -378,7 +378,7 @@ for.end:
-+ 
-+ define void @exp_f64_intrinsic(double* nocapture %varray) {
-+ ; CHECK-LABEL: @exp_f64_intrinsic(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -401,7 +401,7 @@ for.end:
-+ 
-+ define void @exp_f32_intrinsic(float* nocapture %varray) {
-+ ; CHECK-LABEL: @exp_f32_intrinsic(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_expf4_ha(<4 x float> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -424,7 +424,7 @@ for.end:
-+ 
-+ define void @log_f64(double* nocapture %varray) {
-+ ; CHECK-LABEL: @log_f64(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -447,7 +447,7 @@ for.end:
-+ 
-+ define void @log_f32(float* nocapture %varray) {
-+ ; CHECK-LABEL: @log_f32(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_logf4_ha(<4 x float> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -470,7 +470,7 @@ for.end:
-+ 
-+ define void @log_f64_intrinsic(double* nocapture %varray) {
-+ ; CHECK-LABEL: @log_f64_intrinsic(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -493,7 +493,7 @@ for.end:
-+ 
-+ define void @log_f32_intrinsic(float* nocapture %varray) {
-+ ; CHECK-LABEL: @log_f32_intrinsic(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_logf4_ha(<4 x float> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -516,7 +516,7 @@ for.end:
-+ 
-+ define void @log2_f64(double* nocapture %varray) {
-+ ; CHECK-LABEL: @log2_f64(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log24_ha(<4 x double> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -539,7 +539,7 @@ for.end:
-+ 
-+ define void @log2_f32(float* nocapture %varray) {
-+ ; CHECK-LABEL: @log2_f32(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log2f4_ha(<4 x float> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -562,7 +562,7 @@ for.end:
-+ 
-+ define void @log2_f64_intrinsic(double* nocapture %varray) {
-+ ; CHECK-LABEL: @log2_f64_intrinsic(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log24_ha(<4 x double> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -585,7 +585,7 @@ for.end:
-+ 
-+ define void @log2_f32_intrinsic(float* nocapture %varray) {
-+ ; CHECK-LABEL: @log2_f32_intrinsic(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log2f4_ha(<4 x float> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -608,7 +608,7 @@ for.end:
-+ 
-+ define void @log10_f64(double* nocapture %varray) {
-+ ; CHECK-LABEL: @log10_f64(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log104_ha(<4 x double> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -631,7 +631,7 @@ for.end:
-+ 
-+ define void @log10_f32(float* nocapture %varray) {
-+ ; CHECK-LABEL: @log10_f32(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log10f4_ha(<4 x float> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -654,7 +654,7 @@ for.end:
-+ 
-+ define void @log10_f64_intrinsic(double* nocapture %varray) {
-+ ; CHECK-LABEL: @log10_f64_intrinsic(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log104_ha(<4 x double> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -677,7 +677,7 @@ for.end:
-+ 
-+ define void @log10_f32_intrinsic(float* nocapture %varray) {
-+ ; CHECK-LABEL: @log10_f32_intrinsic(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log10f4_ha(<4 x float> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -700,7 +700,7 @@ for.end:
-+ 
-+ define void @sqrt_f64(double* nocapture %varray) {
-+ ; CHECK-LABEL: @sqrt_f64(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sqrt4(<4 x double> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sqrt4_ha(<4 x double> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -723,7 +723,7 @@ for.end:
-+ 
-+ define void @sqrt_f32(float* nocapture %varray) {
-+ ; CHECK-LABEL: @sqrt_f32(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sqrtf4(<4 x float> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_sqrtf4_ha(<4 x float> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -746,7 +746,7 @@ for.end:
-+ 
-+ define void @exp2_f64(double* nocapture %varray) {
-+ ; CHECK-LABEL: @exp2_f64(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp24_ha(<4 x double> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -769,7 +769,7 @@ for.end:
-+ 
-+ define void @exp2_f32(float* nocapture %varray) {
-+ ; CHECK-LABEL: @exp2_f32(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -792,7 +792,7 @@ for.end:
-+ 
-+ define void @exp2_f64_intrinsic(double* nocapture %varray) {
-+ ; CHECK-LABEL: @exp2_f64_intrinsic(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp24_ha(<4 x double> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -815,7 +815,7 @@ for.end:
-+ 
-+ define void @exp2_f32_intrinsic(float* nocapture %varray) {
-+ ; CHECK-LABEL: @exp2_f32_intrinsic(
-+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
-++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> [[TMP4:%.*]])
-+ ; CHECK:    ret void
-+ ;
-+ entry:
-+@@ -836,4 +836,44 @@ for.end:
-+   ret void
-+ }
-+ 
-++; CHECK-LABEL: @atan2_finite
-++; CHECK: intel_svmlcc256 <4 x double> @__svml_atan24(
-++; CHECK: intel_svmlcc256 <4 x double> @__svml_atan24(
-++; CHECK: ret
-++
-++declare double @__atan2_finite(double, double) local_unnamed_addr #0
-++
-++define void @atan2_finite([100 x double]* nocapture %varray) local_unnamed_addr #0 {
-++entry:
-++  br label %for.cond1.preheader
-++
-++for.cond1.preheader:                              ; preds = %for.inc7, %entry
-++  %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for.inc7 ]
-++  %0 = trunc i64 %indvars.iv19 to i32
-++  %conv = sitofp i32 %0 to double
-++  br label %for.body3
-++
-++for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
-++  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
-++  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-++  %1 = trunc i64 %indvars.iv.next to i32
-++  %conv4 = sitofp i32 %1 to double
-++  %call = tail call fast double @__atan2_finite(double %conv, double %conv4)
-++  %arrayidx6 = getelementptr inbounds [100 x double], [100 x double]* %varray, i64 %indvars.iv19, i64 %indvars.iv
-++  store double %call, double* %arrayidx6, align 8
-++  %exitcond = icmp eq i64 %indvars.iv.next, 100
-++  br i1 %exitcond, label %for.inc7, label %for.body3, !llvm.loop !5
-++
-++for.inc7:                                         ; preds = %for.body3
-++  %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
-++  %exitcond21 = icmp eq i64 %indvars.iv.next20, 100
-++  br i1 %exitcond21, label %for.end9, label %for.cond1.preheader
-++
-++for.end9:                                         ; preds = %for.inc7
-++  ret void
-++}
-++
-+ attributes #0 = { nounwind readnone }
-++!5 = distinct !{!5, !6, !7}
-++!6 = !{!"llvm.loop.vectorize.width", i32 8}
-++!7 = !{!"llvm.loop.vectorize.enable", i1 true}
-+diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll
-+new file mode 100644
-+index 0000000000000..326c763994343
-+--- /dev/null
-++++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll
-+@@ -0,0 +1,513 @@
-++; Check legalization of SVML calls, including intrinsic versions (like @llvm.<fn_name>.<type>).
-++
-++; RUN: opt -vector-library=SVML -inject-tli-mappings -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s
-++
-++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-++target triple = "x86_64-unknown-linux-gnu"
-++
-++declare double @sin(double) #0
-++declare float @sinf(float) #0
-++declare double @llvm.sin.f64(double) #0
-++declare float @llvm.sin.f32(float) #0
-++
-++declare double @cos(double) #0
-++declare float @cosf(float) #0
-++declare double @llvm.cos.f64(double) #0
-++declare float @llvm.cos.f32(float) #0
-++
-++declare double @pow(double, double) #0
-++declare float @powf(float, float) #0
-++declare double @llvm.pow.f64(double, double) #0
-++declare float @llvm.pow.f32(float, float) #0
-++
-++declare double @exp(double) #0
-++declare float @expf(float) #0
-++declare double @llvm.exp.f64(double) #0
-++declare float @llvm.exp.f32(float) #0
-++
-++declare double @log(double) #0
-++declare float @logf(float) #0
-++declare double @llvm.log.f64(double) #0
-++declare float @llvm.log.f32(float) #0
-++
-++
-++define void @sin_f64(double* nocapture %varray) {
-++; CHECK-LABEL: @sin_f64(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP2:%.*]])
-++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to double
-++  %call = tail call double @sin(double %conv)
-++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-++  store double %call, double* %arrayidx, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @sin_f32(float* nocapture %varray) {
-++; CHECK-LABEL: @sin_f32(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_sinf8_ha(<8 x float> [[TMP2:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to float
-++  %call = tail call float @sinf(float %conv)
-++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-++  store float %call, float* %arrayidx, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @sin_f64_intrinsic(double* nocapture %varray) {
-++; CHECK-LABEL: @sin_f64_intrinsic(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP2:%.*]])
-++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to double
-++  %call = tail call double @llvm.sin.f64(double %conv)
-++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-++  store double %call, double* %arrayidx, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @sin_f32_intrinsic(float* nocapture %varray) {
-++; CHECK-LABEL: @sin_f32_intrinsic(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_sinf8_ha(<8 x float> [[TMP2:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to float
-++  %call = tail call float @llvm.sin.f32(float %conv)
-++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-++  store float %call, float* %arrayidx, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @cos_f64(double* nocapture %varray) {
-++; CHECK-LABEL: @cos_f64(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP2:%.*]])
-++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to double
-++  %call = tail call double @cos(double %conv)
-++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-++  store double %call, double* %arrayidx, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @cos_f32(float* nocapture %varray) {
-++; CHECK-LABEL: @cos_f32(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_cosf8_ha(<8 x float> [[TMP2:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to float
-++  %call = tail call float @cosf(float %conv)
-++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-++  store float %call, float* %arrayidx, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @cos_f64_intrinsic(double* nocapture %varray) {
-++; CHECK-LABEL: @cos_f64_intrinsic(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP2:%.*]])
-++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to double
-++  %call = tail call double @llvm.cos.f64(double %conv)
-++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-++  store double %call, double* %arrayidx, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @cos_f32_intrinsic(float* nocapture %varray) {
-++; CHECK-LABEL: @cos_f32_intrinsic(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_cosf8_ha(<8 x float> [[TMP2:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to float
-++  %call = tail call float @llvm.cos.f32(float %conv)
-++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-++  store float %call, float* %arrayidx, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
-++; CHECK-LABEL: @pow_f64(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP2:%.*]], <4 x double> [[TMP3:%.*]])
-++; CHECK:    [[TMP4:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP5:%.*]], <4 x double> [[TMP6:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to double
-++  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
-++  %tmp1 = load double, double* %arrayidx, align 4
-++  %tmp2 = tail call double @pow(double %conv, double %tmp1)
-++  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
-++  store double %tmp2, double* %arrayidx2, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
-++; CHECK-LABEL: @pow_f64_intrinsic(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP2:%.*]], <4 x double> [[TMP3:%.*]])
-++; CHECK:    [[TMP4:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP5:%.*]], <4 x double> [[TMP6:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to double
-++  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
-++  %tmp1 = load double, double* %arrayidx, align 4
-++  %tmp2 = tail call double @llvm.pow.f64(double %conv, double %tmp1)
-++  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
-++  store double %tmp2, double* %arrayidx2, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
-++; CHECK-LABEL: @pow_f32(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_powf8_ha(<8 x float> [[TMP2:%.*]], <8 x float> [[WIDE_LOAD:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to float
-++  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
-++  %tmp1 = load float, float* %arrayidx, align 4
-++  %tmp2 = tail call float @powf(float %conv, float %tmp1)
-++  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
-++  store float %tmp2, float* %arrayidx2, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
-++; CHECK-LABEL: @pow_f32_intrinsic(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_powf8_ha(<8 x float> [[TMP2:%.*]], <8 x float> [[TMP3:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to float
-++  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
-++  %tmp1 = load float, float* %arrayidx, align 4
-++  %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1)
-++  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
-++  store float %tmp2, float* %arrayidx2, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @exp_f64(double* nocapture %varray) {
-++; CHECK-LABEL: @exp_f64(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP2:%.*]])
-++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to double
-++  %call = tail call double @exp(double %conv)
-++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-++  store double %call, double* %arrayidx, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @exp_f32(float* nocapture %varray) {
-++; CHECK-LABEL: @exp_f32(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_expf8_ha(<8 x float> [[TMP2:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to float
-++  %call = tail call float @expf(float %conv)
-++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-++  store float %call, float* %arrayidx, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @exp_f64_intrinsic(double* nocapture %varray) {
-++; CHECK-LABEL: @exp_f64_intrinsic(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP2:%.*]])
-++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to double
-++  %call = tail call double @llvm.exp.f64(double %conv)
-++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-++  store double %call, double* %arrayidx, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @exp_f32_intrinsic(float* nocapture %varray) {
-++; CHECK-LABEL: @exp_f32_intrinsic(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_expf8_ha(<8 x float> [[TMP2:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to float
-++  %call = tail call float @llvm.exp.f32(float %conv)
-++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-++  store float %call, float* %arrayidx, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @log_f64(double* nocapture %varray) {
-++; CHECK-LABEL: @log_f64(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP2:%.*]])
-++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to double
-++  %call = tail call double @log(double %conv)
-++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-++  store double %call, double* %arrayidx, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @log_f32(float* nocapture %varray) {
-++; CHECK-LABEL: @log_f32(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_logf8_ha(<8 x float> [[TMP2:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to float
-++  %call = tail call float @logf(float %conv)
-++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-++  store float %call, float* %arrayidx, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @log_f64_intrinsic(double* nocapture %varray) {
-++; CHECK-LABEL: @log_f64_intrinsic(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP2:%.*]])
-++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to double
-++  %call = tail call double @llvm.log.f64(double %conv)
-++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-++  store double %call, double* %arrayidx, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++define void @log_f32_intrinsic(float* nocapture %varray) {
-++; CHECK-LABEL: @log_f32_intrinsic(
-++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_logf8_ha(<8 x float> [[TMP2:%.*]])
-++; CHECK:    ret void
-++;
-++entry:
-++  br label %for.body
-++
-++for.body:
-++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-++  %tmp = trunc i64 %iv to i32
-++  %conv = sitofp i32 %tmp to float
-++  %call = tail call float @llvm.log.f32(float %conv)
-++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-++  store float %call, float* %arrayidx, align 4
-++  %iv.next = add nuw nsw i64 %iv, 1
-++  %exitcond = icmp eq i64 %iv.next, 1000
-++  br i1 %exitcond, label %for.end, label %for.body
-++
-++for.end:
-++  ret void
-++}
-++
-++attributes #0 = { nounwind readnone }
-++
-+diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll
-+new file mode 100644
-+index 0000000000000..9422653445dc2
-+--- /dev/null
-++++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll
-+@@ -0,0 +1,61 @@
-++; Check that vector codegen splits illegal sin8 call to two sin4 calls on AVX for double datatype.
-++; The C code used to generate this test:
-++
-++; #include <math.h>
-++;
-++; void foo(double *a, int N){
-++;   int i;
-++; #pragma clang loop vectorize_width(8)
-++;   for (i=0;i<N;i++){
-++;     a[i] = sin(i);
-++;   }
-++; }
-++
-++; RUN: opt -vector-library=SVML -inject-tli-mappings -loop-vectorize -force-vector-width=8 -mattr=avx -S < %s | FileCheck %s
-++
-++; CHECK: [[I1:%.*]] = sitofp <8 x i32> [[I0:%.*]] to <8 x double>
-++; CHECK-NEXT: [[S1:%shuffle.*]] = shufflevector <8 x double> [[I1]], <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-++; CHECK-NEXT: [[I2:%.*]] = call fast intel_svmlcc256 <4 x double> @__svml_sin4(<4 x double> [[S1]])
-++; CHECK-NEXT: [[S2:%shuffle.*]] = shufflevector <8 x double> [[I1]], <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-++; CHECK-NEXT: [[I3:%.*]] = call fast intel_svmlcc256 <4 x double> @__svml_sin4(<4 x double> [[S2]])
-++; CHECK-NEXT: [[comb:%combined.*]] = shufflevector <4 x double> [[I2]], <4 x double> [[I3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-++; CHECK: store <8 x double> [[comb]], <8 x double>* [[TMP:%.*]], align 8
-++
-++
-++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-++target triple = "x86_64-unknown-linux-gnu"
-++
-++; Function Attrs: nounwind uwtable
-++define dso_local void @foo(double* nocapture %a, i32 %N) local_unnamed_addr #0 {
-++entry:
-++  %cmp5 = icmp sgt i32 %N, 0
-++  br i1 %cmp5, label %for.body.preheader, label %for.end
-++
-++for.body.preheader:                               ; preds = %entry
-++  %wide.trip.count = zext i32 %N to i64
-++  br label %for.body
-++
-++for.body:                                         ; preds = %for.body, %for.body.preheader
-++  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-++  %0 = trunc i64 %indvars.iv to i32
-++  %conv = sitofp i32 %0 to double
-++  %call = tail call fast double @sin(double %conv) #2
-++  %arrayidx = getelementptr inbounds double, double* %a, i64 %indvars.iv
-++  store double %call, double* %arrayidx, align 8, !tbaa !2
-++  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-++  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
-++  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !6
-++
-++for.end:                                          ; preds = %for.body, %entry
-++  ret void
-++}
-++
-++; Function Attrs: nounwind
-++declare dso_local double @sin(double) local_unnamed_addr #1
-++
-++!2 = !{!3, !3, i64 0}
-++!3 = !{!"double", !4, i64 0}
-++!4 = !{!"omnipotent char", !5, i64 0}
-++!5 = !{!"Simple C/C++ TBAA"}
-++!6 = distinct !{!6, !7}
-++!7 = !{!"llvm.loop.vectorize.width", i32 8}
-+diff --git a/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll b/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll
-+index e8c83c4d9bd1f..615fdc29176a2 100644
-+--- a/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll
-++++ b/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll
-+@@ -12,12 +12,12 @@ target triple = "x86_64-unknown-linux-gnu"
-+ 
-+ ; COMMON-LABEL: @llvm.compiler.used = appending global
-+ ; SVML-SAME:        [6 x i8*] [
-+-; SVML-SAME:          i8* bitcast (<2 x double> (<2 x double>)* @__svml_sin2 to i8*),
-+-; SVML-SAME:          i8* bitcast (<4 x double> (<4 x double>)* @__svml_sin4 to i8*),
-+-; SVML-SAME:          i8* bitcast (<8 x double> (<8 x double>)* @__svml_sin8 to i8*),
-+-; SVML-SAME:          i8* bitcast (<4 x float> (<4 x float>)* @__svml_log10f4 to i8*),
-+-; SVML-SAME:          i8* bitcast (<8 x float> (<8 x float>)* @__svml_log10f8 to i8*),
-+-; SVML-SAME:          i8* bitcast (<16 x float> (<16 x float>)* @__svml_log10f16 to i8*)
-++; SVML-SAME:          i8* bitcast (<2 x double> (<2 x double>)* @__svml_sin2_ha to i8*),
-++; SVML-SAME:          i8* bitcast (<4 x double> (<4 x double>)* @__svml_sin4_ha to i8*),
-++; SVML-SAME:          i8* bitcast (<8 x double> (<8 x double>)* @__svml_sin8_ha to i8*),
-++; SVML-SAME:          i8* bitcast (<4 x float> (<4 x float>)* @__svml_log10f4_ha to i8*),
-++; SVML-SAME:          i8* bitcast (<8 x float> (<8 x float>)* @__svml_log10f8_ha to i8*),
-++; SVML-SAME:          i8* bitcast (<16 x float> (<16 x float>)* @__svml_log10f16_ha to i8*)
-+ ; MASSV-SAME:       [2 x i8*] [
-+ ; MASSV-SAME:         i8* bitcast (<2 x double> (<2 x double>)* @__sind2 to i8*),
-+ ; MASSV-SAME:         i8* bitcast (<4 x float> (<4 x float>)* @__log10f4 to i8*)
-+@@ -59,9 +59,9 @@ declare float @llvm.log10.f32(float) #0
-+ attributes #0 = { nounwind readnone }
-+ 
-+ ; SVML:      attributes #[[SIN]] = { "vector-function-abi-variant"=
-+-; SVML-SAME:   "_ZGV_LLVM_N2v_sin(__svml_sin2),
-+-; SVML-SAME:   _ZGV_LLVM_N4v_sin(__svml_sin4),
-+-; SVML-SAME:   _ZGV_LLVM_N8v_sin(__svml_sin8)" }
-++; SVML-SAME:   "_ZGV_LLVM_N2v_sin(__svml_sin2_ha),
-++; SVML-SAME:   _ZGV_LLVM_N4v_sin(__svml_sin4_ha),
-++; SVML-SAME:   _ZGV_LLVM_N8v_sin(__svml_sin8_ha)" }
-+ 
-+ ; MASSV:      attributes #[[SIN]] = { "vector-function-abi-variant"=
-+ ; MASSV-SAME:   "_ZGV_LLVM_N2v_sin(__sind2)" }
-+diff --git a/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt b/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt
-+index 97df6a55d1b59..199e0285c9e5d 100644
-+--- a/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt
-++++ b/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt
-+@@ -47,6 +47,7 @@ add_tablegen(llvm-tblgen LLVM
-+   SearchableTableEmitter.cpp
-+   SubtargetEmitter.cpp
-+   SubtargetFeatureInfo.cpp
-++  SVMLEmitter.cpp
-+   TableGen.cpp
-+   Types.cpp
-+   X86DisassemblerTables.cpp
-+diff --git a/llvm-14.0.6.src/utils/TableGen/SVMLEmitter.cpp b/llvm-14.0.6.src/utils/TableGen/SVMLEmitter.cpp
-+new file mode 100644
-+index 0000000000000..a5aeea48db28b
-+--- /dev/null
-++++ b/llvm-14.0.6.src/utils/TableGen/SVMLEmitter.cpp
-+@@ -0,0 +1,110 @@
-++//===------ SVMLEmitter.cpp - Generate SVML function variants -------------===//
-++//
-++//                     The LLVM Compiler Infrastructure
-++//
-++// This file is distributed under the University of Illinois Open Source
-++// License. See LICENSE.TXT for details.
-++//
-++//===----------------------------------------------------------------------===//
-++//
-++// This tablegen backend emits the scalar to svml function map for TLI.
-++//
-++//===----------------------------------------------------------------------===//
-++
-++#include "CodeGenTarget.h"
-++#include "llvm/Support/Format.h"
-++#include "llvm/TableGen/Error.h"
-++#include "llvm/TableGen/Record.h"
-++#include "llvm/TableGen/TableGenBackend.h"
-++#include <map>
-++#include <vector>
-++
-++using namespace llvm;
-++
-++#define DEBUG_TYPE "SVMLVariants"
-++#include "llvm/Support/Debug.h"
-++
-++namespace {
-++
-++class SVMLVariantsEmitter {
-++
-++  RecordKeeper &Records;
-++
-++private:
-++  void emitSVMLVariants(raw_ostream &OS);
-++
-++public:
-++  SVMLVariantsEmitter(RecordKeeper &R) : Records(R) {}
-++
-++  void run(raw_ostream &OS);
-++};
-++} // End anonymous namespace
-++
-++/// \brief Emit the set of SVML variant function names.
-++// The default is to emit the high accuracy SVML variants until a mechanism is
-++// introduced to allow a selection of different variants through precision
-++// requirements specified by the user. This code generates mappings to svml
-++// that are in the scalar form of llvm intrinsics, math library calls, or the
-++// finite variants of math library calls.
-++void SVMLVariantsEmitter::emitSVMLVariants(raw_ostream &OS) {
-++
-++  const unsigned MinSinglePrecVL = 4;
-++  const unsigned MaxSinglePrecVL = 16;
-++  const unsigned MinDoublePrecVL = 2;
-++  const unsigned MaxDoublePrecVL = 8;
-++
-++  OS << "#ifdef GET_SVML_VARIANTS\n";
-++
-++  for (const auto &D : Records.getAllDerivedDefinitions("SvmlVariant")) {
-++    StringRef SvmlVariantNameStr = D->getName();
-++    // Single Precision SVML
-++    for (unsigned VL = MinSinglePrecVL; VL <= MaxSinglePrecVL; VL *= 2) {
-++      // Emit the scalar math library function to svml function entry.
-++      OS << "{\"" << SvmlVariantNameStr << "f" << "\", ";
-++      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
-++         << "ElementCount::getFixed(" << VL << ")},\n";
-++
-++      // Emit the scalar intrinsic to svml function entry.
-++      OS << "{\"" << "llvm." << SvmlVariantNameStr << ".f32" << "\", ";
-++      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
-++         << "ElementCount::getFixed(" << VL << ")},\n";
-++
-++      // Emit the finite math library function to svml function entry.
-++      OS << "{\"__" << SvmlVariantNameStr << "f_finite" << "\", ";
-++      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
-++         << "ElementCount::getFixed(" << VL << ")},\n";
-++    }
-++
-++    // Double Precision SVML
-++    for (unsigned VL = MinDoublePrecVL; VL <= MaxDoublePrecVL; VL *= 2) {
-++      // Emit the scalar math library function to svml function entry.
-++      OS << "{\"" << SvmlVariantNameStr << "\", ";
-++      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " << "ElementCount::getFixed(" << VL
-++         << ")},\n";
-++
-++      // Emit the scalar intrinsic to svml function entry.
-++      OS << "{\"" << "llvm." << SvmlVariantNameStr << ".f64" << "\", ";
-++      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " << "ElementCount::getFixed(" << VL
-++         << ")},\n";
-++
-++      // Emit the finite math library function to svml function entry.
-++      OS << "{\"__" << SvmlVariantNameStr << "_finite" << "\", ";
-++      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", "
-++         << "ElementCount::getFixed(" << VL << ")},\n";
-++    }
-++  }
-++
-++  OS << "#endif // GET_SVML_VARIANTS\n\n";
-++}
-++
-++void SVMLVariantsEmitter::run(raw_ostream &OS) {
-++  emitSVMLVariants(OS);
-++}
-++
-++namespace llvm {
-++
-++void EmitSVMLVariants(RecordKeeper &RK, raw_ostream &OS) {
-++  SVMLVariantsEmitter(RK).run(OS);
-++}
-++
-++} // End llvm namespace
-+diff --git a/llvm-14.0.6.src/utils/TableGen/TableGen.cpp b/llvm-14.0.6.src/utils/TableGen/TableGen.cpp
-+index 2d4a45f889be6..603d0c223b33a 100644
-+--- a/llvm-14.0.6.src/utils/TableGen/TableGen.cpp
-++++ b/llvm-14.0.6.src/utils/TableGen/TableGen.cpp
-+@@ -57,6 +57,7 @@ enum ActionType {
-+   GenAutomata,
-+   GenDirectivesEnumDecl,
-+   GenDirectivesEnumImpl,
-++  GenSVMLVariants,
-+ };
-+ 
-+ namespace llvm {
-+@@ -138,7 +139,9 @@ cl::opt<ActionType> Action(
-+         clEnumValN(GenDirectivesEnumDecl, "gen-directive-decl",
-+                    "Generate directive related declaration code (header file)"),
-+         clEnumValN(GenDirectivesEnumImpl, "gen-directive-impl",
-+-                   "Generate directive related implementation code")));
-++                   "Generate directive related implementation code"),
-++        clEnumValN(GenSVMLVariants, "gen-svml",
-++                   "Generate SVML variant function names")));
-+ 
-+ cl::OptionCategory PrintEnumsCat("Options for -print-enums");
-+ cl::opt<std::string> Class("class", cl::desc("Print Enum list for this class"),
-+@@ -272,6 +275,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
-+   case GenDirectivesEnumImpl:
-+     EmitDirectivesImpl(Records, OS);
-+     break;
-++  case GenSVMLVariants:
-++    EmitSVMLVariants(Records, OS);
-++    break;
-+   }
-+ 
-+   return false;
-+diff --git a/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h b/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h
-+index 71db8dc77b052..86c3a3068c2dc 100644
-+--- a/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h
-++++ b/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h
-+@@ -93,6 +93,7 @@ void EmitExegesis(RecordKeeper &RK, raw_ostream &OS);
-+ void EmitAutomata(RecordKeeper &RK, raw_ostream &OS);
-+ void EmitDirectivesDecl(RecordKeeper &RK, raw_ostream &OS);
-+ void EmitDirectivesImpl(RecordKeeper &RK, raw_ostream &OS);
-++void EmitSVMLVariants(RecordKeeper &RK, raw_ostream &OS);
-+ 
-+ } // End llvm namespace
-+ 
-+diff --git a/llvm-14.0.6.src/utils/vim/syntax/llvm.vim b/llvm-14.0.6.src/utils/vim/syntax/llvm.vim
-+index 205db16b7d8cd..2572ab5a59e1b 100644
-+--- a/llvm-14.0.6.src/utils/vim/syntax/llvm.vim
-++++ b/llvm-14.0.6.src/utils/vim/syntax/llvm.vim
-+@@ -104,6 +104,7 @@ syn keyword llvmKeyword
-+       \ inreg
-+       \ intel_ocl_bicc
-+       \ inteldialect
-++      \ intel_svmlcc
-+       \ internal
-+       \ jumptable
-+       \ linkonce
-diff --git a/conda-recipes/llvmdev/bld.bat b/conda-recipes/llvmdev/bld.bat
-index 1ce228c80..0cba1e937 100644
---- a/conda-recipes/llvmdev/bld.bat
-+++ b/conda-recipes/llvmdev/bld.bat
-@@ -1,3 +1,13 @@
-+setlocal EnableDelayedExpansion
-+FOR /D %%d IN (llvm-*.src) DO (MKLINK /J llvm %%d
-+if !errorlevel! neq 0 exit /b %errorlevel%)
-+FOR /D %%d IN (lld-*.src) DO (MKLINK /J lld %%d
-+if !errorlevel! neq 0 exit /b %errorlevel%)
-+FOR /D %%d IN (unwind\libunwind-*.src) DO (MKLINK /J libunwind %%d
-+if !errorlevel! neq 0 exit /b %errorlevel%)
-+
-+DIR
-+
- mkdir build
- cd build
- 
-@@ -24,31 +34,18 @@ REM the 64bit linker anyway. This must be passed in to certain generators as
- REM '-Thost x64'.
- set PreferredToolArchitecture=x64
- 
--set MAX_INDEX_CMAKE_GENERATOR=2
--
--REM On older generators we can squeete the architecture into the generator
--REM name. In newer generators, we must use the -A flag for cmake to hand in the
--REM correct architecture. Also, using Visual Studio 16 2019 we use toolset
--REM v141, which basically means use a Visual Studio 15 2017 type compiler from
--REM Visual Studio 16 2019. See also:
--REM https://stackoverflow.com/questions/55708600/whats-the-cmake-generator-for-visual-studio-2019
-+set MAX_INDEX_CMAKE_GENERATOR=0
- 
--set "CMAKE_GENERATOR[0]=Visual Studio 14 2015%ARCH_POSTFIX%"
--set "CMAKE_GENERATOR[1]=Visual Studio 15 2017%ARCH_POSTFIX%"
--set "CMAKE_GENERATOR[2]=Visual Studio 16 2019"
-+set "CMAKE_GENERATOR[0]=Visual Studio 16 2019"
- 
--set "CMAKE_GENERATOR_ARCHITECTURE[0]="
--set "CMAKE_GENERATOR_ARCHITECTURE[1]="
--set "CMAKE_GENERATOR_ARCHITECTURE[2]=%GEN_ARCH%"
-+set "CMAKE_GENERATOR_ARCHITECTURE[0]=%GEN_ARCH%"
- 
--set "CMAKE_GENERATOR_TOOLSET[0]=host %PreferredToolArchitecture%"
--set "CMAKE_GENERATOR_TOOLSET[1]=host  %PreferredToolArchitecture%"
--set "CMAKE_GENERATOR_TOOLSET[2]=v141"
-+set "CMAKE_GENERATOR_TOOLSET[0]=v142"
- 
- REM Reduce build times and package size by removing unused stuff
- REM BENCHMARKS (new for llvm8) don't build under Visual Studio 14 2015
- set CMAKE_CUSTOM=-DLLVM_TARGETS_TO_BUILD="%LLVM_TARGETS_TO_BUILD%" ^
--    -DLLVM_INCLUDE_TESTS=OFF ^
-+    -DLLVM_ENABLE_PROJECTS:STRING=lld ^
-     -DLLVM_INCLUDE_UTILS=ON ^
-     -DLLVM_INCLUDE_DOCS=OFF ^
-     -DLLVM_INCLUDE_EXAMPLES=OFF ^
-@@ -67,7 +64,7 @@ for /l %%n in (0,1,%MAX_INDEX_CMAKE_GENERATOR%) do (
-           -DCMAKE_BUILD_TYPE="%BUILD_CONFIG%" ^
-           -DCMAKE_PREFIX_PATH="%LIBRARY_PREFIX%" ^
-           -DCMAKE_INSTALL_PREFIX:PATH="%LIBRARY_PREFIX%" ^
--          %CMAKE_CUSTOM% "%SRC_DIR%"
-+          %CMAKE_CUSTOM% "%SRC_DIR%\llvm"
-     if not errorlevel 1 goto configuration_successful
-     del CMakeCache.txt
- )
-@@ -85,13 +82,3 @@ if errorlevel 1 exit 1
- REM === Install step ===
- cmake --build . --config "%BUILD_CONFIG%" --target install
- if errorlevel 1 exit 1
--
--REM From: https://github.com/conda-forge/llvmdev-feedstock/pull/53
--"%BUILD_CONFIG%\bin\opt" -S -vector-library=SVML -mcpu=haswell -O3 "%RECIPE_DIR%\numba-3016.ll" | "%BUILD_CONFIG%\bin\FileCheck" "%RECIPE_DIR%\numba-3016.ll"
--if errorlevel 1 exit 1
--
--REM This is technically how to run the suite, but it will only run in an
--REM enhanced unix-like shell which has functions like `grep` available.
--REM cd ..\test
--REM "%PYTHON%" "..\build\%BUILD_CONFIG%\bin\llvm-lit.py" -vv Transforms ExecutionEngine Analysis CodeGen/X86
--REM if errorlevel 1 exit 1
-diff --git a/conda-recipes/llvmdev/build.sh b/conda-recipes/llvmdev/build.sh
-index fd99eee90..dc0af4074 100644
---- a/conda-recipes/llvmdev/build.sh
-+++ b/conda-recipes/llvmdev/build.sh
-@@ -15,10 +15,14 @@ else
-     DARWIN_TARGET=x86_64-apple-darwin13.4.0
- fi
- 
-+mv llvm-*.src llvm
-+mv lld-*.src lld
-+mv unwind/libunwind-*.src libunwind
- 
- declare -a _cmake_config
- _cmake_config+=(-DCMAKE_INSTALL_PREFIX:PATH=${PREFIX})
- _cmake_config+=(-DCMAKE_BUILD_TYPE:STRING=Release)
-+_cmake_config+=(-DLLVM_ENABLE_PROJECTS:STRING="lld")
- # The bootstrap clang I use was built with a static libLLVMObject.a and I trying to get the same here
- # _cmake_config+=(-DBUILD_SHARED_LIBS:BOOL=ON)
- _cmake_config+=(-DLLVM_ENABLE_ASSERTIONS:BOOL=ON)
-@@ -27,6 +31,7 @@ _cmake_config+=(-DLINK_POLLY_INTO_TOOLS:BOOL=ON)
- _cmake_config+=(-DLLVM_ENABLE_LIBXML2:BOOL=OFF)
- # Urgh, llvm *really* wants to link to ncurses / terminfo and we *really* do not want it to.
- _cmake_config+=(-DHAVE_TERMINFO_CURSES=OFF)
-+_cmake_config+=(-DLLVM_ENABLE_TERMINFO=OFF)
- # Sometimes these are reported as unused. Whatever.
- _cmake_config+=(-DHAVE_TERMINFO_NCURSES=OFF)
- _cmake_config+=(-DHAVE_TERMINFO_NCURSESW=OFF)
-@@ -39,10 +44,10 @@ _cmake_config+=(-DLLVM_ENABLE_RTTI=OFF)
- _cmake_config+=(-DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD})
- _cmake_config+=(-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly)
- _cmake_config+=(-DLLVM_INCLUDE_UTILS=ON) # for llvm-lit
-+_cmake_config+=(-DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF) # doesn't build without the rest of LLVM project
- # TODO :: It would be nice if we had a cross-ecosystem 'BUILD_TIME_LIMITED' env var we could use to
- #         disable these unnecessary but useful things.
- if [[ ${CONDA_FORGE} == yes ]]; then
--  _cmake_config+=(-DLLVM_INCLUDE_TESTS=OFF)
-   _cmake_config+=(-DLLVM_INCLUDE_DOCS=OFF)
-   _cmake_config+=(-DLLVM_INCLUDE_EXAMPLES=OFF)
- fi
-@@ -76,7 +81,7 @@ cd build
- 
- cmake -G'Unix Makefiles'     \
-       "${_cmake_config[@]}"  \
--      ..
-+      ../llvm
- 
- ARCH=`uname -m`
- if [ $ARCH == 'armv7l' ]; then # RPi need thread count throttling
-@@ -85,18 +90,7 @@ else
-     make -j${CPU_COUNT} VERBOSE=1
- fi
- 
-+make check-llvm-unit || exit $?
-+
- # From: https://github.com/conda-forge/llvmdev-feedstock/pull/53
- make install || exit $?
--
--# SVML tests on x86_64 arch only
--if [[ $ARCH == 'x86_64' ]]; then
--   bin/opt -S -vector-library=SVML -mcpu=haswell -O3 $RECIPE_DIR/numba-3016.ll | bin/FileCheck $RECIPE_DIR/numba-3016.ll || exit $?
--fi
--
--# run the tests, skip some on linux-32
--cd ../test
--if [[ $ARCH == 'i686' ]]; then
--    ../build/bin/llvm-lit -vv Transforms Analysis CodeGen/X86
--else
--    ../build/bin/llvm-lit -vv Transforms ExecutionEngine Analysis CodeGen/X86
--fi
-diff --git a/conda-recipes/llvmdev/meta.yaml b/conda-recipes/llvmdev/meta.yaml
-index 27b596ffc..e2df508e9 100644
---- a/conda-recipes/llvmdev/meta.yaml
-+++ b/conda-recipes/llvmdev/meta.yaml
-@@ -1,8 +1,9 @@
--{% set shortversion = "11.1" %}
--{% set version = "11.1.0" %}
--{% set sha256_llvm = "ce8508e318a01a63d4e8b3090ab2ded3c598a50258cc49e2625b9120d4c03ea5" %}
--{% set sha256_lld = "017a788cbe1ecc4a949abf10755870519086d058a2e99f438829aef24f0c66ce" %}
--{% set build_number = "5" %}
-+{% set shortversion = "14.0" %}
-+{% set version = "14.0.6" %}
-+{% set sha256_llvm = "050922ecaaca5781fdf6631ea92bc715183f202f9d2f15147226f023414f619a" %}
-+{% set sha256_lld = "0c28ce0496934d37d20fec96591032dd66af8d10178a45762e0e75e85cf95ad3" %}
-+{% set sha256_libunwind = "3bbe9c23c73259fe39c045dc87d0b283236ba6e00750a226b2c2aeac4a51d86b" %}
-+{% set build_number = "0" %}
- 
- package:
-   name: llvmdev
-@@ -13,20 +14,16 @@ source:
-     fn: llvm-{{ version }}.src.tar.xz
-     sha256: {{ sha256_llvm }}
-     patches:
--    - ../partial-testing.patch
--    # Intel SVML optimizations (two patches)
--    - ../intel-D47188-svml-VF.patch
--    # Second patch from https://github.com/conda-forge/llvmdev-feedstock/blob/c706309/recipe/patches/expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
--    - ../expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
--    # Reverts a patch limiting non-GlobalValue name length
--    - ../0001-Revert-Limit-size-of-non-GlobalValue-name.patch
--    # Fixes for aarch64 on LLVM 11 from https://reviews.llvm.org/D104123
--    - ../llvm_11_consecutive_registers.patch
--
-+    - ../llvm14-remove-use-of-clonefile.patch
-+    - ../llvm14-svml.patch
-   - url: https://github.com/llvm/llvm-project/releases/download/llvmorg-{{ version }}/lld-{{ version }}.src.tar.xz
-     fn: lld-{{ version }}.src.tar.xz
-     sha256: {{ sha256_lld }}
--    folder: tools/lld
-+
-+  - url: https://github.com/llvm/llvm-project/releases/download/llvmorg-{{ version }}/libunwind-{{ version }}.src.tar.xz
-+    fn: libunwind-{{ version }}.src.tar.xz
-+    sha256: {{ sha256_libunwind }}
-+    folder: unwind
- 
- build:
-   number: {{ build_number }}
-@@ -59,8 +56,6 @@ requirements:
-     - python # [not (armv6l or armv7l or aarch64 or win)]
- 
- test:
--  files:
--    - numba-3016.ll
-   commands:
-     - $PREFIX/bin/llvm-config --libs                         # [not win]
-     - $PREFIX/bin/llc -version                               # [not win]
-@@ -81,5 +76,5 @@ about:
-   home: http://llvm.org/
-   dev_url: https://github.com/llvm-mirror/llvm
-   license: NCSA
--  license_file: LICENSE.TXT
-+  license_file: llvm/LICENSE.TXT
-   summary: Development headers and libraries for LLVM
-diff --git a/conda-recipes/llvmdev/numba-3016.ll b/conda-recipes/llvmdev/numba-3016.ll
-deleted file mode 100644
-index 1a9b3ecf8..000000000
---- a/conda-recipes/llvmdev/numba-3016.ll
-+++ /dev/null
-@@ -1,80 +0,0 @@
--; Regression test for llvmdev-feedstock#52 and numba#3016
--
--; Generated from C code: int a[1<<10],b[1<<10]; void foo() { int i=0; for(i=0; i<1<<10; i++) { b[i]=sin(a[i]); }}
--; compiled: -fvectorize -fveclib=SVML -O -S -mavx -mllvm -disable-llvm-optzns -emit-llvm
--
--; RUN: opt -vector-library=SVML -mcpu=haswell -O3 -S < %s | FileCheck %s
--; CHECK: call {{.*}}__svml_sin4_ha(
--; CHECK-NOT: call {{.*}}__svml_sin4(
--; CHECK-NOT: call {{.*}}__svml_sin8
--
--source_filename = "svml-3016.c"
--target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
--target triple = "x86_64-pc-linux-gnu"
--
--@a = common dso_local global [1024 x i32] zeroinitializer, align 16
--@b = common dso_local global [1024 x i32] zeroinitializer, align 16
--
--; Function Attrs: nounwind uwtable
--define dso_local void @foo() #0 {
--  %1 = alloca i32, align 4
--  %2 = bitcast i32* %1 to i8*
--  call void @llvm.lifetime.start.p0i8(i64 4, i8* %2) #3
--  store i32 0, i32* %1, align 4, !tbaa !2
--  store i32 0, i32* %1, align 4, !tbaa !2
--  br label %3
--
--; <label>:3:                                      ; preds = %17, %0
--  %4 = load i32, i32* %1, align 4, !tbaa !2
--  %5 = icmp slt i32 %4, 1024
--  br i1 %5, label %6, label %20
--
--; <label>:6:                                      ; preds = %3
--  %7 = load i32, i32* %1, align 4, !tbaa !2
--  %8 = sext i32 %7 to i64
--  %9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @a, i64 0, i64 %8
--  %10 = load i32, i32* %9, align 4, !tbaa !2
--  %11 = sitofp i32 %10 to double
--  %12 = call double @"llvm.sin.f64"(double %11) #3
--  %13 = fptosi double %12 to i32
--  %14 = load i32, i32* %1, align 4, !tbaa !2
--  %15 = sext i32 %14 to i64
--  %16 = getelementptr inbounds [1024 x i32], [1024 x i32]* @b, i64 0, i64 %15
--  store i32 %13, i32* %16, align 4, !tbaa !2
--  br label %17
--
--; <label>:17:                                     ; preds = %6
--  %18 = load i32, i32* %1, align 4, !tbaa !2
--  %19 = add nsw i32 %18, 1
--  store i32 %19, i32* %1, align 4, !tbaa !2
--  br label %3
--
--; <label>:20:                                     ; preds = %3
--  %21 = bitcast i32* %1 to i8*
--  call void @llvm.lifetime.end.p0i8(i64 4, i8* %21) #3
--  ret void
--}
--
--; Function Attrs: argmemonly nounwind
--declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
--
--; Function Attrs: nounwind
--declare dso_local double @"llvm.sin.f64"(double) #2
--
--; Function Attrs: argmemonly nounwind
--declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
--
--attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+avx,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "unsafe-fp-math"="false" "use-soft-float"="false" }
--attributes #1 = { argmemonly nounwind }
--attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+avx,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "unsafe-fp-math"="false" "use-soft-float"="false" }
--attributes #3 = { nounwind }
--
--!llvm.module.flags = !{!0}
--!llvm.ident = !{!1}
--
--!0 = !{i32 1, !"wchar_size", i32 4}
--!1 = !{!"clang version 7.0.0- (trunk)"}
--!2 = !{!3, !3, i64 0}
--!3 = !{!"int", !4, i64 0}
--!4 = !{!"omnipotent char", !5, i64 0}
--!5 = !{!"Simple C/C++ TBAA"}
-diff --git a/conda-recipes/llvmlite/bld.bat b/conda-recipes/llvmlite/bld.bat
-index 475a0637c..d7342e249 100755
---- a/conda-recipes/llvmlite/bld.bat
-+++ b/conda-recipes/llvmlite/bld.bat
-@@ -12,11 +12,8 @@ if "%ARCH%"=="32" (
-     @rem set CMAKE_GENERATOR_ARCH=Win64
-     set CMAKE_GENERATOR_ARCH=x64
- )
--@rem for older VS:
--@rem set CMAKE_GENERATOR=Visual Studio 15 2017
--@rem do not set CMAKE_GENERATOR_TOOLKIT
- set CMAKE_GENERATOR=Visual Studio 16 2019
--set CMAKE_GENERATOR_TOOLKIT=v141
-+set CMAKE_GENERATOR_TOOLKIT=v142
- 
- @rem Ensure there are no build leftovers (CMake can complain)
- if exist ffi\build rmdir /S /Q ffi\build
-diff --git a/conda-recipes/llvmlite/meta.yaml b/conda-recipes/llvmlite/meta.yaml
-index 27e09116d..fd63420ca 100644
---- a/conda-recipes/llvmlite/meta.yaml
-+++ b/conda-recipes/llvmlite/meta.yaml
-@@ -1,4 +1,4 @@
--{% set VERSION_SUFFIX = "" %} # debug version suffix, appended to the version
-+{% set VERSION_SUFFIX = "llvm14" %} # debug version suffix, appended to the version
- 
- package:
-   name: llvmlite
-@@ -23,19 +23,19 @@ requirements:
-     # build.sh deals with it!
-     - {{ compiler('c') }}    # [not (osx or armv6l or armv7l or win)]
-     - {{ compiler('cxx') }}  # [not (osx or armv6l or armv7l or win)]
--    - vs2017_{{ target_platform  }}    # [win]
-+    - vs2015_{{ target_platform  }}    # [win]
-     # The DLL build uses cmake on Windows
-     - cmake          # [win]
-     - make           # [unix and not (armv6l or armv7l or aarch64)]
-   host:
-     - python
-     # On channel https://anaconda.org/numba/
--    - llvmdev 11.1.0 *5 # [(osx and arm64)]
--    - llvmdev 11.1.0 *4 # [not ((osx and arm64) or win)]
--    - llvmdev 11.1.0 4 # [win]
-+    - llvmdev 14
-     - vs2015_runtime # [win]
-     # llvmdev is built with libz compression support
-     - zlib           # [unix and not (armv6l or armv7l)]
-+    # requires libxml2
-+    - libxml2        # [win]
-   run:
-     - python >=3.7,<3.10
-     - vs2015_runtime # [win]
-diff --git a/ffi/Makefile.freebsd b/ffi/Makefile.freebsd
-index ba727e331..7b869e876 100644
---- a/ffi/Makefile.freebsd
-+++ b/ffi/Makefile.freebsd
-@@ -1,5 +1,5 @@
- 
--CXX = clang++ -std=c++11 -stdlib=libc++
-+CXX = clang++ -stdlib=libc++
- 
- # -flto and --exclude-libs allow us to remove those parts of LLVM we don't use
- CXX_FLTO_FLAGS ?= -flto
-diff --git a/ffi/Makefile.osx b/ffi/Makefile.osx
-index bc192071e..74dccf32c 100644
---- a/ffi/Makefile.osx
-+++ b/ffi/Makefile.osx
-@@ -1,6 +1,6 @@
- 
--CXX = clang++ -std=c++11 -stdlib=libc++
--CXXFLAGS = $(LLVM_CXXFLAGS)
-+CXX = clang++
-+CXXFLAGS = $(LLVM_CXXFLAGS) -O3
- # Only export the LLVMPY symbols we require and exclude everything else.
- EXPORT = "-Wl,-exported_symbol,_LLVMPY_*"
- LDFLAGS :=  $(LDFLAGS) $(EXPORT) $(LLVM_LDFLAGS)
-diff --git a/ffi/build.py b/ffi/build.py
-index 55343fca5..e58a691e0 100755
---- a/ffi/build.py
-+++ b/ffi/build.py
-@@ -72,10 +72,10 @@ def find_windows_generator():
-         )
- 
-     generators.extend([
--        # use VS2017 toolkit on VS2019 to match how llvmdev is built
--        ('Visual Studio 16 2019', ('x64' if is_64bit else 'Win32'), 'v141'),
--        # This is the generator configuration for VS2017
--        ('Visual Studio 15 2017' + (' Win64' if is_64bit else ''), None, None)
-+        # use VS2019 to match how llvmdev is built
-+        ('Visual Studio 16 2019', ('x64' if is_64bit else 'Win32'), 'v142'),
-+        # # This is the generator configuration for VS2017
-+        # ('Visual Studio 15 2017' + (' Win64' if is_64bit else ''), None, None)
-     ])
-     for generator in generators:
-         build_dir = tempfile.mkdtemp()
-@@ -163,9 +163,10 @@ def main_posix(kind, library_ext):
-         print(msg)
-         print(warning + '\n')
-     else:
--
--        if not out.startswith('11'):
--            msg = ("Building llvmlite requires LLVM 11.x.x, got "
-+        (version, _) = out.split('.', 1)
-+        version = int(version)
-+        if version < 11 or version > 14:
-+            msg = ("Building llvmlite requires LLVM 11, 12, 13, or 14, got "
-                    "{!r}. Be sure to set LLVM_CONFIG to the right executable "
-                    "path.\nRead the documentation at "
-                    "http://llvmlite.pydata.org/ for more information about "
-diff --git a/ffi/passmanagers.cpp b/ffi/passmanagers.cpp
-index dd67ca5cc..60064cf10 100644
---- a/ffi/passmanagers.cpp
-+++ b/ffi/passmanagers.cpp
-@@ -16,11 +16,8 @@
- 
- #include "llvm-c/Transforms/IPO.h"
- #include "llvm-c/Transforms/Scalar.h"
--#include "llvm/IR/LegacyPassManager.h"
--#if LLVM_VERSION_MAJOR > 11
--#include "llvm/IR/RemarkStreamer.h"
--#endif
- #include "llvm/IR/LLVMRemarkStreamer.h"
-+#include "llvm/IR/LegacyPassManager.h"
- #include "llvm/Remarks/RemarkStreamer.h"
- #include "llvm/Transforms/IPO.h"
- #include "llvm/Transforms/Scalar.h"
-@@ -220,7 +217,11 @@ LLVMPY_AddLazyValueInfoPass(LLVMPassManagerRef PM) {
- }
- API_EXPORT(void)
- LLVMPY_AddLintPass(LLVMPassManagerRef PM) {
-+#if LLVM_VERSION_MAJOR < 12
-     unwrap(PM)->add(llvm::createLintPass());
-+#else
-+    unwrap(PM)->add(llvm::createLintLegacyPassPass());
-+#endif
- }
- API_EXPORT(void)
- LLVMPY_AddModuleDebugInfoPrinterPass(LLVMPassManagerRef PM) {
-diff --git a/ffi/targets.cpp b/ffi/targets.cpp
-index 3b5abf510..b96d22c9f 100644
---- a/ffi/targets.cpp
-+++ b/ffi/targets.cpp
-@@ -6,7 +6,11 @@
- #include "llvm/IR/LegacyPassManager.h"
- #include "llvm/IR/Type.h"
- #include "llvm/Support/Host.h"
-+#if LLVM_VERSION_MAJOR > 13
-+#include "llvm/MC/TargetRegistry.h"
-+#else
- #include "llvm/Support/TargetRegistry.h"
-+#endif
- #include "llvm/Target/TargetMachine.h"
- 
- #include <cstdio>
-@@ -204,7 +208,11 @@ LLVMPY_CreateTargetMachine(LLVMTargetRef T, const char *Triple, const char *CPU,
-         rm = Reloc::DynamicNoPIC;
- 
-     TargetOptions opt;
-+#if LLVM_VERSION_MAJOR < 12
-     opt.PrintMachineCode = PrintMC;
-+#else
-+    opt.MCOptions.ShowMCInst = PrintMC;
-+#endif
-     opt.MCOptions.ABIName = ABIName;
- 
-     bool jit = JIT;
-diff --git a/ffi/value.cpp b/ffi/value.cpp
-index 771acd423..01871699d 100644
---- a/ffi/value.cpp
-+++ b/ffi/value.cpp
-@@ -153,8 +153,13 @@ LLVMPY_ArgumentAttributesIter(LLVMValueRef A) {
-     using namespace llvm;
-     Argument *arg = unwrap<Argument>(A);
-     unsigned argno = arg->getArgNo();
--    AttributeSet attrs =
--        arg->getParent()->getAttributes().getParamAttributes(argno);
-+    const AttributeSet attrs = arg->getParent()->getAttributes().
-+#if LLVM_VERSION_MAJOR < 14
-+                               getParamAttributes(argno)
-+#else
-+                               getParamAttrs(argno)
-+#endif
-+        ;
-     return wrap(new AttributeSetIterator(attrs.begin(), attrs.end()));
- }
- 
-@@ -353,7 +358,11 @@ LLVMPY_GetElementType(LLVMTypeRef type) {
-     llvm::Type *unwrapped = llvm::unwrap(type);
-     llvm::PointerType *ty = llvm::dyn_cast<llvm::PointerType>(unwrapped);
-     if (ty != nullptr) {
-+#if LLVM_VERSION_MAJOR < 14
-         return llvm::wrap(ty->getElementType());
-+#else
-+        return llvm::wrap(ty->getPointerElementType());
-+#endif
-     }
-     return nullptr;
- }
-diff --git a/llvmlite/binding/passmanagers.py b/llvmlite/binding/passmanagers.py
-index 26f7bd259..4b9daf468 100644
---- a/llvmlite/binding/passmanagers.py
-+++ b/llvmlite/binding/passmanagers.py
-@@ -199,7 +199,8 @@ def add_lint_pass(self):
-         """
-         See https://llvm.org/docs/Passes.html#lint-statically-lint-checks-llvm-ir
- 
--        LLVM 11+: `llvm::createLintPass`
-+        LLVM 11: `llvm::createLintPass`
-+        LLVM 12+: `llvm::createLintLegacyPassPass`
-         """  # noqa E501
-         ffi.lib.LLVMPY_AddLintPass(self)
- 
-diff --git a/llvmlite/tests/test_binding.py b/llvmlite/tests/test_binding.py
-index dc4dbc484..70902e04c 100644
---- a/llvmlite/tests/test_binding.py
-+++ b/llvmlite/tests/test_binding.py
-@@ -640,7 +640,7 @@ def test_set_option(self):
-     def test_version(self):
-         major, minor, patch = llvm.llvm_version_info
-         # one of these can be valid
--        valid = [(11,)]
-+        valid = [(11,), (12, ), (13, ), (14, )]
-         self.assertIn((major,), valid)
-         self.assertIn(patch, range(10))
-