diff --git a/llvm.spec b/llvm.spec
index 1769d0ad3de..7d89ae2fc97 100644
--- a/llvm.spec
+++ b/llvm.spec
@@ -1,4 +1,4 @@
-### RPM external llvm 12.0.1
+### RPM external llvm 14.0.6
 ## INITENV +PATH LD_LIBRARY_PATH %{i}/lib64
 ## INITENV +PATH PYTHON3PATH %{i}/lib64/python%{cms_python3_major_minor_version}/site-packages
 
@@ -7,10 +7,10 @@ Requires: gcc zlib python3
 Requires: cuda
 AutoReq: no
 
-%define llvmCommit 9f4ab770e61b68d2037cc7cda1f868a8ba52da85
-%define llvmBranch cms/release/12.x/fed4134
-%define iwyuCommit 5db414ac448004fe019871c977905cb7c2cff23f
-%define iwyuBranch clang_11
+%define llvmCommit d88fe2a1cafb6621210a7a0ae968c1e8e797d2bb
+%define llvmBranch cms/release/14.x/f28c006
+%define iwyuCommit d888efc52646dcf3e4e3a56af13aa23dd26abde0
+%define iwyuBranch master
 
 Source0: git+https://github.com/cms-externals/llvm-project.git?obj=%{llvmBranch}/%{llvmCommit}&export=llvm-%{realversion}-%{llvmCommit}&module=llvm-%{realversion}-%{llvmCommit}&output=/llvm-%{realversion}-%{llvmCommit}.tgz
 Source1: git+https://github.com/include-what-you-use/include-what-you-use.git?obj=%{iwyuBranch}/%{iwyuCommit}&export=iwyu-%{realversion}-%{iwyuCommit}&module=iwyu-%{realversion}-%{iwyuCommit}&output=/iwyu-%{realversion}-%{iwyuCommit}.tgz
diff --git a/pip/llvmlite.file b/pip/llvmlite.file
index 4491e0ac3b2..28d8ef95165 100644
--- a/pip/llvmlite.file
+++ b/pip/llvmlite.file
@@ -1,7 +1,5 @@
 Requires: llvm
-Patch0: py3-llvmlite-fpic-flag
-Patch1: py3-llvmlite-version
-Patch3: py3-llvmlite-removeMethod
+Patch0: py3-llvmlite-14
 
-%define source0 git+https://github.com/numba/llvmlite?obj=release0.35/v%{realversion}&export=llvmlite-%{realversion}&output=/source.tar.gz
+%define source0 git+https://github.com/numba/llvmlite?obj=main/%{realversion}&export=llvmlite-%{realversion}&output=/source.tar.gz
 %define PipPreBuild export LLVM_CONFIG=${LLVM_ROOT}/bin/llvm-config
diff --git a/pip/requirements.txt b/pip/requirements.txt
index 007c6b4776c..f9e4b706ee2 100644
--- a/pip/requirements.txt
+++ b/pip/requirements.txt
@@ -151,7 +151,7 @@ kiwisolver==1.4.4
 law==0.1.7
 lazy-object-proxy==1.7.1
 lizard==1.17.10
-llvmlite==0.38.1
+llvmlite==778380378bb856b10d4d77f45aa9386f8de4d940
 lockfile==0.12.2
 luigi==3.1.1
 lxml==4.9.1
diff --git a/py3-dxr.spec b/py3-dxr.spec
index b95a6edf1b7..c276e728ba4 100644
--- a/py3-dxr.spec
+++ b/py3-dxr.spec
@@ -2,7 +2,7 @@
 ## INITENV +PATH PYTHON3PATH %i/${PYTHON3_LIB_SITE_PACKAGES}
 Requires: zlib llvm sqlite
 Requires: py3-Jinja2 py3-parsimonious py3-pysqlite3 py3-Pygments
-%define dxrCommit de41946bc5601d100efb44780f11db71dafaeb1e
+%define dxrCommit e79425eded8ca0ae882b4ccceaf27ae1aab446d3
 %define branch cms/6ea764102a/py3
 
 Source0: git+https://github.com/cms-externals/dxr.git?obj=%{branch}/%{dxrCommit}&export=dxr-%{dxrCommit}&module=dxr-%dxrCommit&output=/dxr-%{dxrCommit}.tgz
diff --git a/py3-llvmlite-14.patch b/py3-llvmlite-14.patch
new file mode 100644
index 00000000000..088c40da230
--- /dev/null
+++ b/py3-llvmlite-14.patch
@@ -0,0 +1,3129 @@
+From c37e824380fec443edb24c914b1767dcff496d38 Mon Sep 17 00:00:00 2001
+From: Andre Masella <andre@masella.name>
+Date: Tue, 5 Apr 2022 15:22:21 -0400
+Subject: [PATCH] Update to LLVM 12-14
+
+Modify llvmlite to support LLVM 11-14 and modify conda recipe to build LLVM14.
+Also lift over all patches to LLVM versions as required.
+---
+ ...-Limit-size-of-non-GlobalValue-name.patch} |    0
+ ...tch => llvm11-consecutive_registers.patch} |    0
+ ...-entrypoints-in-add-TLI-mappings.ll.patch} |    0
+ ...atch => llvm11-intel-D47188-svml-VF.patch} |    0
+ ...o-static.patch => llvm11-lto-static.patch} |    0
+ ...ing.patch => llvm11-partial-testing.patch} |    0
+ ...t-Limit-size-of-non-GlobalValue-name.patch |   49 +
+ .../llvm12-consecutive_registers.patch        |  181 ++
+ conda-recipes/llvm12-lto-static.patch         |   12 +
+ conda-recipes/llvm13-lto-static.patch         |   12 +
+ .../llvm14-remove-use-of-clonefile.patch      |   54 +
+ conda-recipes/llvm14-svml.patch               | 2192 +++++++++++++++++
+ conda-recipes/llvmdev/bld.bat                 |   45 +-
+ conda-recipes/llvmdev/build.sh                |   24 +-
+ conda-recipes/llvmdev/meta.yaml               |   33 +-
+ conda-recipes/llvmdev/numba-3016.ll           |   80 -
+ conda-recipes/llvmlite/bld.bat                |    5 +-
+ conda-recipes/llvmlite/meta.yaml              |   10 +-
+ ffi/Makefile.freebsd                          |    2 +-
+ ffi/Makefile.osx                              |    4 +-
+ ffi/build.py                                  |   15 +-
+ ffi/passmanagers.cpp                          |    9 +-
+ ffi/targets.cpp                               |    8 +
+ ffi/value.cpp                                 |   13 +-
+ llvmlite/binding/passmanagers.py              |    3 +-
+ llvmlite/tests/test_binding.py                |    2 +-
+ 26 files changed, 2583 insertions(+), 170 deletions(-)
+ rename conda-recipes/{0001-Revert-Limit-size-of-non-GlobalValue-name.patch => llvm11-0001-Revert-Limit-size-of-non-GlobalValue-name.patch} (100%)
+ rename conda-recipes/{llvm_11_consecutive_registers.patch => llvm11-consecutive_registers.patch} (100%)
+ rename conda-recipes/{expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch => llvm11-expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch} (100%)
+ rename conda-recipes/{intel-D47188-svml-VF.patch => llvm11-intel-D47188-svml-VF.patch} (100%)
+ rename conda-recipes/{llvm-lto-static.patch => llvm11-lto-static.patch} (100%)
+ rename conda-recipes/{partial-testing.patch => llvm11-partial-testing.patch} (100%)
+ create mode 100644 conda-recipes/llvm12-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
+ create mode 100644 conda-recipes/llvm12-consecutive_registers.patch
+ create mode 100644 conda-recipes/llvm12-lto-static.patch
+ create mode 100644 conda-recipes/llvm13-lto-static.patch
+ create mode 100644 conda-recipes/llvm14-remove-use-of-clonefile.patch
+ create mode 100644 conda-recipes/llvm14-svml.patch
+ delete mode 100644 conda-recipes/llvmdev/numba-3016.ll
+
+diff --git a/conda-recipes/0001-Revert-Limit-size-of-non-GlobalValue-name.patch b/conda-recipes/llvm11-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
+similarity index 100%
+rename from conda-recipes/0001-Revert-Limit-size-of-non-GlobalValue-name.patch
+rename to conda-recipes/llvm11-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
+diff --git a/conda-recipes/llvm_11_consecutive_registers.patch b/conda-recipes/llvm11-consecutive_registers.patch
+similarity index 100%
+rename from conda-recipes/llvm_11_consecutive_registers.patch
+rename to conda-recipes/llvm11-consecutive_registers.patch
+diff --git a/conda-recipes/expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch b/conda-recipes/llvm11-expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
+similarity index 100%
+rename from conda-recipes/expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
+rename to conda-recipes/llvm11-expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
+diff --git a/conda-recipes/intel-D47188-svml-VF.patch b/conda-recipes/llvm11-intel-D47188-svml-VF.patch
+similarity index 100%
+rename from conda-recipes/intel-D47188-svml-VF.patch
+rename to conda-recipes/llvm11-intel-D47188-svml-VF.patch
+diff --git a/conda-recipes/llvm-lto-static.patch b/conda-recipes/llvm11-lto-static.patch
+similarity index 100%
+rename from conda-recipes/llvm-lto-static.patch
+rename to conda-recipes/llvm11-lto-static.patch
+diff --git a/conda-recipes/partial-testing.patch b/conda-recipes/llvm11-partial-testing.patch
+similarity index 100%
+rename from conda-recipes/partial-testing.patch
+rename to conda-recipes/llvm11-partial-testing.patch
+diff --git a/conda-recipes/llvm12-0001-Revert-Limit-size-of-non-GlobalValue-name.patch b/conda-recipes/llvm12-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
+new file mode 100644
+index 000000000..9b722d36c
+--- /dev/null
++++ b/conda-recipes/llvm12-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
+@@ -0,0 +1,49 @@
++diff -ur a/lib/IR/Value.cpp b/lib/IR/Value.cpp
++--- a/lib/IR/Value.cpp	2021-04-06 12:38:18.000000000 -0400
+++++ b/lib/IR/Value.cpp	2022-03-31 15:39:31.000000000 -0400
++@@ -38,10 +38,6 @@
++ 
++ using namespace llvm;
++ 
++-static cl::opt<unsigned> NonGlobalValueMaxNameSize(
++-    "non-global-value-max-name-size", cl::Hidden, cl::init(1024),
++-    cl::desc("Maximum size for the name of non-global values."));
++-
++ //===----------------------------------------------------------------------===//
++ //                                Value Class
++ //===----------------------------------------------------------------------===//
++@@ -319,11 +315,6 @@
++   if (getName() == NameRef)
++     return;
++ 
++-  // Cap the size of non-GlobalValue names.
++-  if (NameRef.size() > NonGlobalValueMaxNameSize && !isa<GlobalValue>(this))
++-    NameRef =
++-        NameRef.substr(0, std::max(1u, (unsigned)NonGlobalValueMaxNameSize));
++-
++   assert(!getType()->isVoidTy() && "Cannot assign a name to void values!");
++ 
++   // Get the symbol table to update for this object.
++diff -ur a/test/Bitcode/value-with-long-name.ll b/test/Bitcode/value-with-long-name.ll
++deleted file mode 1000644
++--- a/test/Bitcode/value-with-long-name.ll
+++++ /dev/null
++@@ -1,18 +0,0 @@
++-; Check the size of generated variable when no option is set
++-; RUN: opt -S %s -O2 -o - | FileCheck -check-prefix=CHECK-LONG %s
++-; CHECK-LONG: %{{[a-z]{4}[a-z]+}}
++-
++-; Then check we correctly cap the size of newly generated non-global values name
++-; Force the size to be small so that the check works on release and debug build
++-; RUN: opt -S %s -O2 -o - -non-global-value-max-name-size=0 | FileCheck -check-prefix=CHECK-SHORT %s
++-; RUN: opt -S %s -O2 -o - -non-global-value-max-name-size=1 | FileCheck -check-prefix=CHECK-SHORT %s
++-; CHECK-SHORT-NOT: %{{[a-z][a-z]+}}
++-
++-define i32 @f(i32 %a, i32 %b) {
++-  %c = add i32 %a, %b
++-  %d = add i32 %c, %a
++-  %e = add i32 %d, %b
++-  ret i32 %e
++-}
++-
++-
+diff --git a/conda-recipes/llvm12-consecutive_registers.patch b/conda-recipes/llvm12-consecutive_registers.patch
+new file mode 100644
+index 000000000..cc60217bd
+--- /dev/null
++++ b/conda-recipes/llvm12-consecutive_registers.patch
+@@ -0,0 +1,181 @@
++diff -ur a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h
++--- a/include/llvm/CodeGen/TargetLowering.h	2021-04-06 12:38:18.000000000 -0400
+++++ b/include/llvm/CodeGen/TargetLowering.h	2022-03-31 15:52:45.000000000 -0400
++@@ -3975,7 +3975,8 @@
++   /// must be passed in a block of consecutive registers.
++   virtual bool
++   functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv,
++-                                            bool isVarArg) const {
+++                                            bool isVarArg,
+++                                            const DataLayout &DL) const {
++     return false;
++   }
++ 
++diff -ur a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
++--- a/lib/CodeGen/SelectionDAG/FastISel.cpp	2021-04-06 12:38:18.000000000 -0400
+++++ b/lib/CodeGen/SelectionDAG/FastISel.cpp	2022-03-31 15:52:45.000000000 -0400
++@@ -1087,7 +1087,7 @@
++     if (Arg.IsByVal)
++       FinalType = cast<PointerType>(Arg.Ty)->getElementType();
++     bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
++-        FinalType, CLI.CallConv, CLI.IsVarArg);
+++        FinalType, CLI.CallConv, CLI.IsVarArg, DL);
++ 
++     ISD::ArgFlagsTy Flags;
++     if (Arg.IsZExt)
++diff -ur a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
++--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp	2021-04-06 12:38:18.000000000 -0400
+++++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp	2022-03-31 15:52:45.000000000 -0400
++@@ -1851,7 +1851,7 @@
++ 
++       bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
++           I.getOperand(0)->getType(), F->getCallingConv(),
++-          /*IsVarArg*/ false);
+++          /*IsVarArg*/ false, DL);
++ 
++       ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
++       if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
++@@ -9229,7 +9229,7 @@
++     CLI.IsTailCall = false;
++   } else {
++     bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
++-        CLI.RetTy, CLI.CallConv, CLI.IsVarArg);
+++        CLI.RetTy, CLI.CallConv, CLI.IsVarArg, DL);
++     for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
++       ISD::ArgFlagsTy Flags;
++       if (NeedsRegBlock) {
++@@ -9289,7 +9289,7 @@
++     if (Args[i].IsByVal)
++       FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
++     bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
++-        FinalType, CLI.CallConv, CLI.IsVarArg);
+++        FinalType, CLI.CallConv, CLI.IsVarArg, DL);
++     for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
++          ++Value) {
++       EVT VT = ValueVTs[Value];
++@@ -9830,7 +9830,7 @@
++     if (Arg.hasAttribute(Attribute::ByVal))
++       FinalType = Arg.getParamByValType();
++     bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
++-        FinalType, F.getCallingConv(), F.isVarArg());
+++        FinalType, F.getCallingConv(), F.isVarArg(), DL);
++     for (unsigned Value = 0, NumValues = ValueVTs.size();
++          Value != NumValues; ++Value) {
++       EVT VT = ValueVTs[Value];
++diff -ur a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
++--- a/lib/Target/AArch64/AArch64ISelLowering.cpp	2021-04-06 12:38:18.000000000 -0400
+++++ b/lib/Target/AArch64/AArch64ISelLowering.cpp	2022-03-31 15:52:45.000000000 -0400
++@@ -30,6 +30,7 @@
++ #include "llvm/ADT/Triple.h"
++ #include "llvm/ADT/Twine.h"
++ #include "llvm/Analysis/VectorUtils.h"
+++#include "llvm/CodeGen/Analysis.h"
++ #include "llvm/CodeGen/CallingConvLower.h"
++ #include "llvm/CodeGen/MachineBasicBlock.h"
++ #include "llvm/CodeGen/MachineFrameInfo.h"
++@@ -16455,15 +16456,17 @@
++ }
++ 
++ bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
++-    Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
++-  if (Ty->isArrayTy())
++-    return true;
++-
++-  const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
++-  if (TySize.isScalable() && TySize.getKnownMinSize() > 128)
++-    return true;
+++    Type *Ty, CallingConv::ID CallConv, bool isVarArg,
+++    const DataLayout &DL) const {
+++  if (!Ty->isArrayTy()) {
+++    const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
+++    return TySize.isScalable() && TySize.getKnownMinSize() > 128;
+++  }
++ 
++-  return false;
+++  // All non aggregate members of the type must have the same type
+++  SmallVector<EVT, 0> ValueVTs;
+++  ComputeValueVTs(*this, DL, Ty, ValueVTs);
+++  return is_splat(ValueVTs);
++ }
++ 
++ bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
++diff -ur a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
++--- a/lib/Target/AArch64/AArch64ISelLowering.h	2021-04-06 12:38:18.000000000 -0400
+++++ b/lib/Target/AArch64/AArch64ISelLowering.h	2022-03-31 15:52:45.000000000 -0400
++@@ -770,9 +770,10 @@
++   MachineMemOperand::Flags getTargetMMOFlags(
++     const Instruction &I) const override;
++ 
++-  bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
++-                                                 CallingConv::ID CallConv,
++-                                                 bool isVarArg) const override;
+++  bool functionArgumentNeedsConsecutiveRegisters(
+++      Type *Ty, CallingConv::ID CallConv, bool isVarArg,
+++      const DataLayout &DL) const override;
+++
++   /// Used for exception handling on Win64.
++   bool needsFixedCatchObjects() const override;
++ 
++diff -ur a/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
++--- a/lib/Target/AArch64/GISel/AArch64CallLowering.cpp	2021-04-06 12:38:18.000000000 -0400
+++++ b/lib/Target/AArch64/GISel/AArch64CallLowering.cpp	2022-03-31 15:52:45.000000000 -0400
++@@ -259,7 +259,7 @@
++   assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
++ 
++   bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
++-      OrigArg.Ty, CallConv, false);
+++      OrigArg.Ty, CallConv, false, DL);
++   for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
++     Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
++     SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags[0],
++diff -ur a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
++--- a/lib/Target/ARM/ARMCallLowering.cpp	2021-04-06 12:38:18.000000000 -0400
+++++ b/lib/Target/ARM/ARMCallLowering.cpp	2022-03-31 15:52:45.000000000 -0400
++@@ -218,7 +218,7 @@
++ 
++     bool NeedsConsecutiveRegisters =
++         TLI.functionArgumentNeedsConsecutiveRegisters(
++-            SplitTy, F.getCallingConv(), F.isVarArg());
+++            SplitTy, F.getCallingConv(), F.isVarArg(), DL);
++     if (NeedsConsecutiveRegisters) {
++       Flags.setInConsecutiveRegs();
++       if (i == e - 1)
++diff -ur a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
++--- a/lib/Target/ARM/ARMISelLowering.cpp	2021-04-06 12:38:18.000000000 -0400
+++++ b/lib/Target/ARM/ARMISelLowering.cpp	2022-03-31 15:52:45.000000000 -0400
++@@ -19269,7 +19269,8 @@
++ /// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
++ /// passing according to AAPCS rules.
++ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
++-    Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
+++    Type *Ty, CallingConv::ID CallConv, bool isVarArg,
+++    const DataLayout &DL) const {
++   if (getEffectiveCallingConv(CallConv, isVarArg) !=
++       CallingConv::ARM_AAPCS_VFP)
++     return false;
++diff -ur a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
++--- a/lib/Target/ARM/ARMISelLowering.h	2021-04-06 12:38:18.000000000 -0400
+++++ b/lib/Target/ARM/ARMISelLowering.h	2022-03-31 15:52:45.000000000 -0400
++@@ -578,7 +578,8 @@
++     /// Returns true if an argument of type Ty needs to be passed in a
++     /// contiguous block of registers in calling convention CallConv.
++     bool functionArgumentNeedsConsecutiveRegisters(
++-        Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
+++        Type *Ty, CallingConv::ID CallConv, bool isVarArg,
+++        const DataLayout &DL) const override;
++ 
++     /// If a physical register, this returns the register that receives the
++     /// exception address on entry to an EH pad.
++diff -ur a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
++--- a/lib/Target/PowerPC/PPCISelLowering.h	2021-04-06 12:38:18.000000000 -0400
+++++ b/lib/Target/PowerPC/PPCISelLowering.h	2022-03-31 15:52:45.000000000 -0400
++@@ -998,7 +998,8 @@
++     /// Returns true if an argument of type Ty needs to be passed in a
++     /// contiguous block of registers in calling convention CallConv.
++     bool functionArgumentNeedsConsecutiveRegisters(
++-      Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override {
+++        Type *Ty, CallingConv::ID CallConv, bool isVarArg,
+++        const DataLayout &DL) const override {
++       // We support any array type as "consecutive" block in the parameter
++       // save area.  The element type defines the alignment requirement and
++       // whether the argument should go in GPRs, FPRs, or VRs if available.
+diff --git a/conda-recipes/llvm12-lto-static.patch b/conda-recipes/llvm12-lto-static.patch
+new file mode 100644
+index 000000000..76cc55def
+--- /dev/null
++++ b/conda-recipes/llvm12-lto-static.patch
+@@ -0,0 +1,12 @@
++diff -ur a/tools/lto/CMakeLists.txt b/tools/lto/CMakeLists.txt
++--- llvm-12.0.0.src-orig/tools/lto/CMakeLists.txt	2021-04-06 12:38:18.000000000 -0400
+++++ llvm-12.0.0.src/tools/lto/CMakeLists.txt	2022-03-31 15:46:00.000000000 -0400
++@@ -21,7 +21,7 @@
++ 
++ set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/lto.exports)
++ 
++-add_llvm_library(LTO SHARED INSTALL_WITH_TOOLCHAIN ${SOURCES} DEPENDS
+++add_llvm_library(LTO INSTALL_WITH_TOOLCHAIN ${SOURCES} DEPENDS
++     intrinsics_gen)
++ 
++ install(FILES ${LLVM_MAIN_INCLUDE_DIR}/llvm-c/lto.h
+diff --git a/conda-recipes/llvm13-lto-static.patch b/conda-recipes/llvm13-lto-static.patch
+new file mode 100644
+index 000000000..b8a624250
+--- /dev/null
++++ b/conda-recipes/llvm13-lto-static.patch
+@@ -0,0 +1,12 @@
++diff -ur llvm-13.0.0.src-orig/tools/lto/CMakeLists.txt llvm-13.0.0.src/tools/lto/CMakeLists.txt
++--- llvm-13.0.0.src-orig/tools/lto/CMakeLists.txt	2021-09-24 12:18:10.000000000 -0400
+++++ llvm-13.0.0.src/tools/lto/CMakeLists.txt	2022-03-31 17:07:07.000000000 -0400
++@@ -25,7 +25,7 @@
++     set(LTO_LIBRARY_TYPE MODULE)
++     set(LTO_LIBRARY_NAME libLTO)
++   else()
++-    set(LTO_LIBRARY_TYPE SHARED)
+++    set(LTO_LIBRARY_TYPE STATIC)
++     set(LTO_LIBRARY_NAME LTO)
++ endif()
++ 
+diff --git a/conda-recipes/llvm14-remove-use-of-clonefile.patch b/conda-recipes/llvm14-remove-use-of-clonefile.patch
+new file mode 100644
+index 000000000..6ef9c9d61
+--- /dev/null
++++ b/conda-recipes/llvm14-remove-use-of-clonefile.patch
+@@ -0,0 +1,54 @@
++diff -ur a/llvm-14.0.6.src/lib/Support/Unix/Path.inc b/llvm-14.0.6.src/lib/Support/Unix/Path.inc
++--- a/llvm-14.0.6.src/lib/Support/Unix/Path.inc	2022-03-14 05:44:55.000000000 -0400
+++++ b/llvm-14.0.6.src/lib/Support/Unix/Path.inc	2022-09-19 11:30:59.000000000 -0400
++@@ -1462,6 +1462,7 @@
++ std::error_code copy_file(const Twine &From, const Twine &To) {
++   std::string FromS = From.str();
++   std::string ToS = To.str();
+++  /*
++ #if __has_builtin(__builtin_available)
++   if (__builtin_available(macos 10.12, *)) {
++     // Optimistically try to use clonefile() and handle errors, rather than
++@@ -1490,6 +1491,7 @@
++     // cheaper.
++   }
++ #endif
+++  */
++   if (!copyfile(FromS.c_str(), ToS.c_str(), /*State=*/NULL, COPYFILE_DATA))
++     return std::error_code();
++   return std::error_code(errno, std::generic_category());
++diff -ur a/llvm-14.0.6.src/unittests/Support/Path.cpp b/llvm-14.0.6.src/unittests/Support/Path.cpp
++--- a/llvm-14.0.6.src/unittests/Support/Path.cpp	2022-03-14 05:44:55.000000000 -0400
+++++ b/llvm-14.0.6.src/unittests/Support/Path.cpp	2022-09-19 11:33:07.000000000 -0400
++@@ -2267,15 +2267,15 @@
++ 
++   EXPECT_EQ(fs::setPermissions(TempPath, fs::set_uid_on_exe), NoError);
++   EXPECT_TRUE(CheckPermissions(fs::set_uid_on_exe));
++-
+++#if !defined(__APPLE__)
++   EXPECT_EQ(fs::setPermissions(TempPath, fs::set_gid_on_exe), NoError);
++   EXPECT_TRUE(CheckPermissions(fs::set_gid_on_exe));
++-
+++#endif
++   // Modern BSDs require root to set the sticky bit on files.
++   // AIX and Solaris without root will mask off (i.e., lose) the sticky bit
++   // on files.
++ #if !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) &&  \
++-    !defined(_AIX) && !(defined(__sun__) && defined(__svr4__))
+++    !defined(_AIX) && !(defined(__sun__) && defined(__svr4__)) && !defined(__APPLE__)
++   EXPECT_EQ(fs::setPermissions(TempPath, fs::sticky_bit), NoError);
++   EXPECT_TRUE(CheckPermissions(fs::sticky_bit));
++ 
++@@ -2297,10 +2297,12 @@
++   EXPECT_TRUE(CheckPermissions(fs::all_perms));
++ #endif // !FreeBSD && !NetBSD && !OpenBSD && !AIX
++ 
+++#if !defined(__APPLE__)
++   EXPECT_EQ(fs::setPermissions(TempPath, fs::all_perms & ~fs::sticky_bit),
++                                NoError);
++   EXPECT_TRUE(CheckPermissions(fs::all_perms & ~fs::sticky_bit));
++ #endif
+++#endif
++ }
++ 
++ #ifdef _WIN32
+diff --git a/conda-recipes/llvm14-svml.patch b/conda-recipes/llvm14-svml.patch
+new file mode 100644
+index 000000000..cdce26b34
+--- /dev/null
++++ b/conda-recipes/llvm14-svml.patch
+@@ -0,0 +1,2192 @@
++From bc2dcd190b7148d04772fa7fcd18b5200b758d4a Mon Sep 17 00:00:00 2001
++From: Ivan Butygin <ivan.butygin@gmail.com>
++Date: Sun, 24 Jul 2022 20:31:29 +0200
++Subject: [PATCH] Fixes vectorizer and extends SVML support
++
++Patch was updated to fix SVML calling convention issues uncovered by llvm 10.
++In previous versions of patch SVML calling convention was selected based on
++compilation settings. So if you try to call 256bit vector function from avx512
++code function will be called with avx512 cc which is incorrect. To fix this
++SVML cc was separated into 3 different cc for 128, 256 and 512bit vector lengths
++which are selected based on actual input vector length.
++
++Original patch merged several fixes:
++
++1. https://reviews.llvm.org/D47188 patch fixes the problem with improper calls
++to SVML library as it has non-standard calling conventions. So accordingly it
++has SVML calling conventions definitions and code to set CC to the vectorized
++calls. As SVML provides several implementations for the math functions we also
++took into consideration fast attribute and select more fast implementation in
++such case. This work is based on original Matt Masten's work.
++Author: Denis Nagorny
++
++2. https://reviews.llvm.org/D53035 patch implements support to legalize SVML
++calls by breaking down the illegal vector call instruction into multiple legal
++vector call instructions during code generation. Currently the vectorizer does
++not check legality of the generated SVML (or any VECLIB) call instructions, and
++this can lead to potential problems even during vector type legalization. This
++patch addresses this issue by adding a legality check during code generation and
++replaces the illegal SVML call with corresponding legalized instructions.
++(RFC: http://lists.llvm.org/pipermail/llvm-dev/2018-June/124357.html)
++Author: Karthik Senthil
++---
++ .../include/llvm/Analysis/TargetLibraryInfo.h |  22 +-
++ llvm/include/llvm/AsmParser/LLToken.h         |   3 +
++ llvm/include/llvm/IR/CMakeLists.txt           |   4 +
++ llvm/include/llvm/IR/CallingConv.h            |   5 +
++ llvm/include/llvm/IR/SVML.td                  |  62 +++
++ llvm/lib/Analysis/CMakeLists.txt              |   1 +
++ llvm/lib/Analysis/TargetLibraryInfo.cpp       |  55 +-
++ llvm/lib/AsmParser/LLLexer.cpp                |   3 +
++ llvm/lib/AsmParser/LLParser.cpp               |   6 +
++ llvm/lib/CodeGen/ReplaceWithVeclib.cpp        |   2 +-
++ llvm/lib/IR/AsmWriter.cpp                     |   3 +
++ llvm/lib/IR/Verifier.cpp                      |   3 +
++ llvm/lib/Target/X86/X86CallingConv.td         |  70 +++
++ llvm/lib/Target/X86/X86ISelLowering.cpp       |   3 +-
++ llvm/lib/Target/X86/X86RegisterInfo.cpp       |  46 ++
++ llvm/lib/Target/X86/X86Subtarget.h            |   3 +
++ .../Transforms/Utils/InjectTLIMappings.cpp    |   2 +-
++ .../Transforms/Vectorize/LoopVectorize.cpp    | 269 +++++++++
++ .../Generic/replace-intrinsics-with-veclib.ll |   4 +-
++ .../LoopVectorize/X86/svml-calls-finite.ll    |  24 +-
++ .../LoopVectorize/X86/svml-calls.ll           | 108 ++--
++ .../LoopVectorize/X86/svml-legal-calls.ll     | 513 ++++++++++++++++++
++ .../LoopVectorize/X86/svml-legal-codegen.ll   |  61 +++
++ llvm/test/Transforms/Util/add-TLI-mappings.ll |  18 +-
++ llvm/utils/TableGen/CMakeLists.txt            |   1 +
++ llvm/utils/TableGen/SVMLEmitter.cpp           | 110 ++++
++ llvm/utils/TableGen/TableGen.cpp              |   8 +-
++ llvm/utils/TableGen/TableGenBackends.h        |   1 +
++ llvm/utils/vim/syntax/llvm.vim                |   1 +
++ 29 files changed, 1341 insertions(+), 70 deletions(-)
++ create mode 100644 llvm/include/llvm/IR/SVML.td
++ create mode 100644 llvm/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll
++ create mode 100644 llvm/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll
++ create mode 100644 llvm/utils/TableGen/SVMLEmitter.cpp
++
++diff --git a/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h b/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h
++index 17d1e3f770c14..110ff08189867 100644
++--- a/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h
+++++ b/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h
++@@ -39,6 +39,12 @@ struct VecDesc {
++     NotLibFunc
++   };
++ 
+++enum SVMLAccuracy {
+++  SVML_DEFAULT,
+++  SVML_HA,
+++  SVML_EP
+++};
+++
++ /// Implementation of the target library information.
++ ///
++ /// This class constructs tables that hold the target library information and
++@@ -157,7 +163,7 @@ class TargetLibraryInfoImpl {
++   /// Return true if the function F has a vector equivalent with vectorization
++   /// factor VF.
++   bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const {
++-    return !getVectorizedFunction(F, VF).empty();
+++    return !getVectorizedFunction(F, VF, false).empty();
++   }
++ 
++   /// Return true if the function F has a vector equivalent with any
++@@ -166,7 +172,10 @@ class TargetLibraryInfoImpl {
++ 
++   /// Return the name of the equivalent of F, vectorized with factor VF. If no
++   /// such mapping exists, return the empty string.
++-  StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const;
+++  std::string getVectorizedFunction(StringRef F, const ElementCount &VF, bool IsFast) const;
+++
+++  Optional<CallingConv::ID> getVectorizedFunctionCallingConv(
+++    StringRef F, const FunctionType &FTy, const DataLayout &DL) const;
++ 
++   /// Set to true iff i32 parameters to library functions should have signext
++   /// or zeroext attributes if they correspond to C-level int or unsigned int,
++@@ -326,8 +335,13 @@ class TargetLibraryInfo {
++   bool isFunctionVectorizable(StringRef F) const {
++     return Impl->isFunctionVectorizable(F);
++   }
++-  StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const {
++-    return Impl->getVectorizedFunction(F, VF);
+++  std::string getVectorizedFunction(StringRef F, const ElementCount &VF, bool IsFast) const {
+++    return Impl->getVectorizedFunction(F, VF, IsFast);
+++  }
+++
+++  Optional<CallingConv::ID> getVectorizedFunctionCallingConv(
+++    StringRef F, const FunctionType &FTy, const DataLayout &DL) const {
+++    return Impl->getVectorizedFunctionCallingConv(F, FTy, DL);
++   }
++ 
++   /// Tests if the function is both available and a candidate for optimized code
++diff --git a/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h b/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h
++index 78ebb35e0ea4d..3ffb57db8b18b 100644
++--- a/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h
+++++ b/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h
++@@ -133,6 +133,9 @@ enum Kind {
++   kw_fastcc,
++   kw_coldcc,
++   kw_intel_ocl_bicc,
+++  kw_intel_svmlcc128,
+++  kw_intel_svmlcc256,
+++  kw_intel_svmlcc512,
++   kw_cfguard_checkcc,
++   kw_x86_stdcallcc,
++   kw_x86_fastcallcc,
++diff --git a/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt b/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt
++index 0498fc269b634..23bb3de41bc1a 100644
++--- a/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt
+++++ b/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt
++@@ -20,3 +20,7 @@ tablegen(LLVM IntrinsicsX86.h -gen-intrinsic-enums -intrinsic-prefix=x86)
++ tablegen(LLVM IntrinsicsXCore.h -gen-intrinsic-enums -intrinsic-prefix=xcore)
++ tablegen(LLVM IntrinsicsVE.h -gen-intrinsic-enums -intrinsic-prefix=ve)
++ add_public_tablegen_target(intrinsics_gen)
+++
+++set(LLVM_TARGET_DEFINITIONS SVML.td)
+++tablegen(LLVM SVML.inc -gen-svml)
+++add_public_tablegen_target(svml_gen)
++diff --git a/llvm-14.0.6.src/include/llvm/IR/CallingConv.h b/llvm-14.0.6.src/include/llvm/IR/CallingConv.h
++index fd28542465225..096eea1a8e19b 100644
++--- a/llvm-14.0.6.src/include/llvm/IR/CallingConv.h
+++++ b/llvm-14.0.6.src/include/llvm/IR/CallingConv.h
++@@ -252,6 +252,11 @@ namespace CallingConv {
++     /// M68k_INTR - Calling convention used for M68k interrupt routines.
++     M68k_INTR = 101,
++ 
+++    /// Intel_SVML - Calling conventions for Intel Short Math Vector Library
+++    Intel_SVML128 = 102,
+++    Intel_SVML256 = 103,
+++    Intel_SVML512 = 104,
+++
++     /// The highest possible calling convention ID. Must be some 2^k - 1.
++     MaxID = 1023
++   };
++diff --git a/llvm-14.0.6.src/include/llvm/IR/SVML.td b/llvm-14.0.6.src/include/llvm/IR/SVML.td
++new file mode 100644
++index 0000000000000..5af710404c9d9
++--- /dev/null
+++++ b/llvm-14.0.6.src/include/llvm/IR/SVML.td
++@@ -0,0 +1,62 @@
+++//===-- Intel_SVML.td - Defines SVML call variants ---------*- tablegen -*-===//
+++//
+++//                     The LLVM Compiler Infrastructure
+++//
+++// This file is distributed under the University of Illinois Open Source
+++// License. See LICENSE.TXT for details.
+++//
+++//===----------------------------------------------------------------------===//
+++//
+++// This file is used by TableGen to define the different typs of SVML function
+++// variants used with -fveclib=SVML.
+++//
+++//===----------------------------------------------------------------------===//
+++
+++class SvmlVariant;
+++
+++def sin        : SvmlVariant;
+++def cos        : SvmlVariant;
+++def pow        : SvmlVariant;
+++def exp        : SvmlVariant;
+++def log        : SvmlVariant;
+++def acos       : SvmlVariant;
+++def acosh      : SvmlVariant;
+++def asin       : SvmlVariant;
+++def asinh      : SvmlVariant;
+++def atan2      : SvmlVariant;
+++def atan       : SvmlVariant;
+++def atanh      : SvmlVariant;
+++def cbrt       : SvmlVariant;
+++def cdfnorm    : SvmlVariant;
+++def cdfnorminv : SvmlVariant;
+++def cosd       : SvmlVariant;
+++def cosh       : SvmlVariant;
+++def erf        : SvmlVariant;
+++def erfc       : SvmlVariant;
+++def erfcinv    : SvmlVariant;
+++def erfinv     : SvmlVariant;
+++def exp10      : SvmlVariant;
+++def exp2       : SvmlVariant;
+++def expm1      : SvmlVariant;
+++def hypot      : SvmlVariant;
+++def invsqrt    : SvmlVariant;
+++def log10      : SvmlVariant;
+++def log1p      : SvmlVariant;
+++def log2       : SvmlVariant;
+++def sind       : SvmlVariant;
+++def sinh       : SvmlVariant;
+++def sqrt       : SvmlVariant;
+++def tan        : SvmlVariant;
+++def tanh       : SvmlVariant;
+++
+++// TODO: SVML does not currently provide _ha and _ep variants of these fucnctions.
+++// We should call the default variant of these functions in all cases instead.
+++
+++// def nearbyint  : SvmlVariant;
+++// def logb       : SvmlVariant;
+++// def floor      : SvmlVariant;
+++// def fmod       : SvmlVariant;
+++// def ceil       : SvmlVariant;
+++// def trunc      : SvmlVariant;
+++// def rint       : SvmlVariant;
+++// def round      : SvmlVariant;
++diff --git a/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt b/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt
++index aec84124129f4..98286e166fbe2 100644
++--- a/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt
+++++ b/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt
++@@ -150,6 +150,7 @@ add_llvm_component_library(LLVMAnalysis
++   DEPENDS
++   intrinsics_gen
++   ${MLDeps}
+++  svml_gen
++ 
++   LINK_LIBS
++   ${MLLinkDeps}
++diff --git a/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp b/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp
++index 02923c2c7eb14..83abde28a62a4 100644
++--- a/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp
+++++ b/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp
++@@ -110,6 +110,11 @@ bool TargetLibraryInfoImpl::isCallingConvCCompatible(Function *F) {
++                                     F->getFunctionType());
++ }
++ 
+++static std::string svmlMangle(StringRef FnName, const bool IsFast) {
+++  std::string FullName = FnName.str();
+++  return IsFast ? FullName : FullName + "_ha";
+++}
+++
++ /// Initialize the set of available library functions based on the specified
++ /// target triple. This should be carefully written so that a missing target
++ /// triple gets a sane set of defaults.
++@@ -1876,8 +1881,9 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
++   }
++   case SVML: {
++     const VecDesc VecFuncs[] = {
++-    #define TLI_DEFINE_SVML_VECFUNCS
++-    #include "llvm/Analysis/VecFuncs.def"
+++    #define GET_SVML_VARIANTS
+++    #include "llvm/IR/SVML.inc"
+++    #undef GET_SVML_VARIANTS
++     };
++     addVectorizableFunctions(VecFuncs);
++     break;
++@@ -1897,20 +1903,51 @@ bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
++   return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName;
++ }
++ 
++-StringRef
++-TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
++-                                             const ElementCount &VF) const {
+++std::string TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
+++                                                         const ElementCount &VF,
+++                                                         bool IsFast) const {
+++  bool FromSVML = ClVectorLibrary == SVML;
++   F = sanitizeFunctionName(F);
++   if (F.empty())
++-    return F;
+++    return F.str();
++   std::vector<VecDesc>::const_iterator I =
++       llvm::lower_bound(VectorDescs, F, compareWithScalarFnName);
++   while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) {
++-    if (I->VectorizationFactor == VF)
++-      return I->VectorFnName;
+++    if (I->VectorizationFactor == VF) {
+++      if (FromSVML) {
+++        return svmlMangle(I->VectorFnName, IsFast);
+++      }
+++      return I->VectorFnName.str();
+++    }
++     ++I;
++   }
++-  return StringRef();
+++  return std::string();
+++}
+++
+++static CallingConv::ID getSVMLCallingConv(const DataLayout &DL, const FunctionType &FType)
+++{
+++  assert(isa<VectorType>(FType.getReturnType()));
+++  auto *VecCallRetType = cast<VectorType>(FType.getReturnType());
+++  auto TypeBitWidth = DL.getTypeSizeInBits(VecCallRetType);
+++  if (TypeBitWidth == 128) {
+++    return CallingConv::Intel_SVML128;
+++  } else if (TypeBitWidth == 256) {
+++    return CallingConv::Intel_SVML256;
+++  } else if (TypeBitWidth == 512) {
+++    return CallingConv::Intel_SVML512;
+++  } else {
+++    llvm_unreachable("Invalid vector width");
+++  }
+++  return 0; // not reachable
+++}
+++
+++Optional<CallingConv::ID>
+++TargetLibraryInfoImpl::getVectorizedFunctionCallingConv(
+++    StringRef F, const FunctionType &FTy, const DataLayout &DL) const {
+++  if (F.startswith("__svml")) {
+++    return getSVMLCallingConv(DL, FTy);
+++  }
+++  return {};
++ }
++ 
++ TargetLibraryInfo TargetLibraryAnalysis::run(const Function &F,
++diff --git a/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp b/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp
++index e3bf41c9721b6..4f9dccd4e0724 100644
++--- a/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp
+++++ b/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp
++@@ -603,6 +603,9 @@ lltok::Kind LLLexer::LexIdentifier() {
++   KEYWORD(spir_kernel);
++   KEYWORD(spir_func);
++   KEYWORD(intel_ocl_bicc);
+++  KEYWORD(intel_svmlcc128);
+++  KEYWORD(intel_svmlcc256);
+++  KEYWORD(intel_svmlcc512);
++   KEYWORD(x86_64_sysvcc);
++   KEYWORD(win64cc);
++   KEYWORD(x86_regcallcc);
++diff --git a/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp b/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp
++index 432ec151cf8ae..3bd6ee61024b8 100644
++--- a/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp
+++++ b/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp
++@@ -1781,6 +1781,9 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
++ ///   ::= 'ccc'
++ ///   ::= 'fastcc'
++ ///   ::= 'intel_ocl_bicc'
+++///   ::= 'intel_svmlcc128'
+++///   ::= 'intel_svmlcc256'
+++///   ::= 'intel_svmlcc512'
++ ///   ::= 'coldcc'
++ ///   ::= 'cfguard_checkcc'
++ ///   ::= 'x86_stdcallcc'
++@@ -1850,6 +1853,9 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
++   case lltok::kw_spir_kernel:    CC = CallingConv::SPIR_KERNEL; break;
++   case lltok::kw_spir_func:      CC = CallingConv::SPIR_FUNC; break;
++   case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break;
+++  case lltok::kw_intel_svmlcc128:CC = CallingConv::Intel_SVML128; break;
+++  case lltok::kw_intel_svmlcc256:CC = CallingConv::Intel_SVML256; break;
+++  case lltok::kw_intel_svmlcc512:CC = CallingConv::Intel_SVML512; break;
++   case lltok::kw_x86_64_sysvcc:  CC = CallingConv::X86_64_SysV; break;
++   case lltok::kw_win64cc:        CC = CallingConv::Win64; break;
++   case lltok::kw_webkit_jscc:    CC = CallingConv::WebKit_JS; break;
++diff --git a/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp
++index 0ff045fa787e8..175651949ef85 100644
++--- a/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp
+++++ b/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp
++@@ -157,7 +157,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
++   // and the exact vector width of the call operands in the
++   // TargetLibraryInfo.
++   const std::string TLIName =
++-      std::string(TLI.getVectorizedFunction(ScalarName, VF));
+++      std::string(TLI.getVectorizedFunction(ScalarName, VF, CI.getFastMathFlags().isFast()));
++ 
++   LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
++                     << ScalarName << "` and vector width " << VF << ".\n");
++diff --git a/llvm-14.0.6.src/lib/IR/AsmWriter.cpp b/llvm-14.0.6.src/lib/IR/AsmWriter.cpp
++index 179754e275b03..c4e95752c97e8 100644
++--- a/llvm-14.0.6.src/lib/IR/AsmWriter.cpp
+++++ b/llvm-14.0.6.src/lib/IR/AsmWriter.cpp
++@@ -306,6 +306,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
++   case CallingConv::X86_RegCall:   Out << "x86_regcallcc"; break;
++   case CallingConv::X86_VectorCall:Out << "x86_vectorcallcc"; break;
++   case CallingConv::Intel_OCL_BI:  Out << "intel_ocl_bicc"; break;
+++  case CallingConv::Intel_SVML128: Out << "intel_svmlcc128"; break;
+++  case CallingConv::Intel_SVML256: Out << "intel_svmlcc256"; break;
+++  case CallingConv::Intel_SVML512: Out << "intel_svmlcc512"; break;
++   case CallingConv::ARM_APCS:      Out << "arm_apcscc"; break;
++   case CallingConv::ARM_AAPCS:     Out << "arm_aapcscc"; break;
++   case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break;
++diff --git a/llvm-14.0.6.src/lib/IR/Verifier.cpp b/llvm-14.0.6.src/lib/IR/Verifier.cpp
++index 989d01e2e3950..bae7382a36e13 100644
++--- a/llvm-14.0.6.src/lib/IR/Verifier.cpp
+++++ b/llvm-14.0.6.src/lib/IR/Verifier.cpp
++@@ -2457,6 +2457,9 @@ void Verifier::visitFunction(const Function &F) {
++   case CallingConv::Fast:
++   case CallingConv::Cold:
++   case CallingConv::Intel_OCL_BI:
+++  case CallingConv::Intel_SVML128:
+++  case CallingConv::Intel_SVML256:
+++  case CallingConv::Intel_SVML512:
++   case CallingConv::PTX_Kernel:
++   case CallingConv::PTX_Device:
++     Assert(!F.isVarArg(), "Calling convention does not support varargs or "
++diff --git a/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td b/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td
++index 4dd8a6cdd8982..12e65521215e4 100644
++--- a/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td
+++++ b/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td
++@@ -498,6 +498,21 @@ def RetCC_X86_64 : CallingConv<[
++   CCDelegateTo<RetCC_X86_64_C>
++ ]>;
++ 
+++// Intel_SVML return-value convention.
+++def RetCC_Intel_SVML : CallingConv<[
+++  // Vector types are returned in XMM0,XMM1
+++  CCIfType<[v4f32, v2f64],
+++            CCAssignToReg<[XMM0,XMM1]>>,
+++
+++  // 256-bit FP vectors
+++  CCIfType<[v8f32, v4f64],
+++            CCAssignToReg<[YMM0,YMM1]>>,
+++
+++  // 512-bit FP vectors
+++  CCIfType<[v16f32, v8f64],
+++            CCAssignToReg<[ZMM0,ZMM1]>>
+++]>;
+++
++ // This is the return-value convention used for the entire X86 backend.
++ let Entry = 1 in
++ def RetCC_X86 : CallingConv<[
++@@ -505,6 +520,10 @@ def RetCC_X86 : CallingConv<[
++   // Check if this is the Intel OpenCL built-ins calling convention
++   CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<RetCC_Intel_OCL_BI>>,
++ 
+++  CCIfCC<"CallingConv::Intel_SVML128", CCDelegateTo<RetCC_Intel_SVML>>,
+++  CCIfCC<"CallingConv::Intel_SVML256", CCDelegateTo<RetCC_Intel_SVML>>,
+++  CCIfCC<"CallingConv::Intel_SVML512", CCDelegateTo<RetCC_Intel_SVML>>,
+++
++   CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
++   CCDelegateTo<RetCC_X86_32>
++ ]>;
++@@ -1064,6 +1083,30 @@ def CC_Intel_OCL_BI : CallingConv<[
++   CCDelegateTo<CC_X86_32_C>
++ ]>;
++ 
+++// X86-64 Intel Short Vector Math Library calling convention.
+++def CC_Intel_SVML : CallingConv<[
+++
+++  // The SSE vector arguments are passed in XMM registers.
+++  CCIfType<[v4f32, v2f64],
+++           CCAssignToReg<[XMM0, XMM1, XMM2]>>,
+++
+++  // The 256-bit vector arguments are passed in YMM registers.
+++  CCIfType<[v8f32, v4f64],
+++           CCAssignToReg<[YMM0, YMM1, YMM2]>>,
+++
+++  // The 512-bit vector arguments are passed in ZMM registers.
+++  CCIfType<[v16f32, v8f64],
+++           CCAssignToReg<[ZMM0, ZMM1, ZMM2]>>
+++]>;
+++
+++def CC_X86_32_Intr : CallingConv<[
+++  CCAssignToStack<4, 4>
+++]>;
+++
+++def CC_X86_64_Intr : CallingConv<[
+++  CCAssignToStack<8, 8>
+++]>;
+++
++ //===----------------------------------------------------------------------===//
++ // X86 Root Argument Calling Conventions
++ //===----------------------------------------------------------------------===//
++@@ -1115,6 +1158,9 @@ def CC_X86_64 : CallingConv<[
++ let Entry = 1 in
++ def CC_X86 : CallingConv<[
++   CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<CC_Intel_OCL_BI>>,
+++  CCIfCC<"CallingConv::Intel_SVML128", CCDelegateTo<CC_Intel_SVML>>,
+++  CCIfCC<"CallingConv::Intel_SVML256", CCDelegateTo<CC_Intel_SVML>>,
+++  CCIfCC<"CallingConv::Intel_SVML512", CCDelegateTo<CC_Intel_SVML>>,
++   CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
++   CCDelegateTo<CC_X86_32>
++ ]>;
++@@ -1227,3 +1273,27 @@ def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP,
++                                                (sequence "R%u", 12, 15))>;
++ def CSR_SysV64_RegCall       : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,               
++                                                (sequence "XMM%u", 8, 15))>;
+++
+++// SVML calling convention
+++def CSR_32_Intel_SVML        : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE)>;
+++def CSR_32_Intel_SVML_AVX512 : CalleeSavedRegs<(add CSR_32_Intel_SVML,
+++                                                K4, K5, K6, K7)>;
+++
+++def CSR_64_Intel_SVML_NoSSE : CalleeSavedRegs<(add RBX, RSI, RDI, RBP, RSP, R12, R13, R14, R15)>;
+++
+++def CSR_64_Intel_SVML       : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
+++                                               (sequence "XMM%u", 8, 15))>;
+++def CSR_Win64_Intel_SVML    : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
+++                                               (sequence "XMM%u", 6, 15))>;
+++
+++def CSR_64_Intel_SVML_AVX        : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
+++                                                    (sequence "YMM%u", 8, 15))>;
+++def CSR_Win64_Intel_SVML_AVX     : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
+++                                                    (sequence "YMM%u", 6, 15))>;
+++
+++def CSR_64_Intel_SVML_AVX512     : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
+++                                                    (sequence "ZMM%u", 16, 31),
+++                                                    K4, K5, K6, K7)>;
+++def CSR_Win64_Intel_SVML_AVX512  : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
+++                                                    (sequence "ZMM%u", 6, 21),
+++                                                    K4, K5, K6, K7)>;
++diff --git a/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp b/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp
++index 8bb7e81e19bbd..1780ce3fc6467 100644
++--- a/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp
+++++ b/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp
++@@ -3788,7 +3788,8 @@ void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
++   // FIXME: Only some x86_32 calling conventions support AVX512.
++   if (Subtarget.useAVX512Regs() &&
++       (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
++-                     CallConv == CallingConv::Intel_OCL_BI)))
+++                     CallConv == CallingConv::Intel_OCL_BI   ||
+++                     CallConv == CallingConv::Intel_SVML512)))
++     VecVT = MVT::v16f32;
++   else if (Subtarget.hasAVX())
++     VecVT = MVT::v8f32;
++diff --git a/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp b/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp
++index 130cb61cdde24..9eec3b25ca9f2 100644
++--- a/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp
+++++ b/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp
++@@ -272,6 +272,42 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
++   }
++ }
++ 
+++namespace {
+++std::pair<const uint32_t *, const MCPhysReg *> getSVMLRegMaskAndSaveList(
+++  bool Is64Bit, bool IsWin64, CallingConv::ID CC) {
+++  assert(CC >= CallingConv::Intel_SVML128 && CC <= CallingConv::Intel_SVML512);
+++  unsigned Abi = CC - CallingConv::Intel_SVML128 ; // 0 - 128, 1 - 256, 2 - 512
+++
+++  const std::pair<const uint32_t *, const MCPhysReg *> Abi64[] = {
+++    std::make_pair(CSR_64_Intel_SVML_RegMask,        CSR_64_Intel_SVML_SaveList),
+++    std::make_pair(CSR_64_Intel_SVML_AVX_RegMask,    CSR_64_Intel_SVML_AVX_SaveList),
+++    std::make_pair(CSR_64_Intel_SVML_AVX512_RegMask, CSR_64_Intel_SVML_AVX512_SaveList),
+++  };
+++
+++  const std::pair<const uint32_t *, const MCPhysReg *> AbiWin64[] = {
+++    std::make_pair(CSR_Win64_Intel_SVML_RegMask,        CSR_Win64_Intel_SVML_SaveList),
+++    std::make_pair(CSR_Win64_Intel_SVML_AVX_RegMask,    CSR_Win64_Intel_SVML_AVX_SaveList),
+++    std::make_pair(CSR_Win64_Intel_SVML_AVX512_RegMask, CSR_Win64_Intel_SVML_AVX512_SaveList),
+++  };
+++
+++  const std::pair<const uint32_t *, const MCPhysReg *> Abi32[] = {
+++    std::make_pair(CSR_32_Intel_SVML_RegMask,        CSR_32_Intel_SVML_SaveList),
+++    std::make_pair(CSR_32_Intel_SVML_RegMask,        CSR_32_Intel_SVML_SaveList),
+++    std::make_pair(CSR_32_Intel_SVML_AVX512_RegMask, CSR_32_Intel_SVML_AVX512_SaveList),
+++  };
+++
+++  if (Is64Bit) {
+++    if (IsWin64) {
+++      return AbiWin64[Abi];
+++    } else {
+++      return Abi64[Abi];
+++    }
+++  } else {
+++    return Abi32[Abi];
+++  }
+++}
+++}
+++
++ const MCPhysReg *
++ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
++   assert(MF && "MachineFunction required");
++@@ -327,6 +363,11 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
++       return CSR_64_Intel_OCL_BI_SaveList;
++     break;
++   }
+++  case CallingConv::Intel_SVML128:
+++  case CallingConv::Intel_SVML256:
+++  case CallingConv::Intel_SVML512: {
+++    return getSVMLRegMaskAndSaveList(Is64Bit, IsWin64, CC).second;
+++  }
++   case CallingConv::HHVM:
++     return CSR_64_HHVM_SaveList;
++   case CallingConv::X86_RegCall:
++@@ -449,6 +490,11 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
++       return CSR_64_Intel_OCL_BI_RegMask;
++     break;
++   }
+++  case CallingConv::Intel_SVML128:
+++  case CallingConv::Intel_SVML256:
+++  case CallingConv::Intel_SVML512: {
+++    return getSVMLRegMaskAndSaveList(Is64Bit, IsWin64, CC).first;
+++  }
++   case CallingConv::HHVM:
++     return CSR_64_HHVM_RegMask;
++   case CallingConv::X86_RegCall:
++diff --git a/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h b/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h
++index 5d773f0c57dfb..6bdf5bc6f3fe9 100644
++--- a/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h
+++++ b/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h
++@@ -916,6 +916,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
++     case CallingConv::X86_ThisCall:
++     case CallingConv::X86_VectorCall:
++     case CallingConv::Intel_OCL_BI:
+++    case CallingConv::Intel_SVML128:
+++    case CallingConv::Intel_SVML256:
+++    case CallingConv::Intel_SVML512:
++       return isTargetWin64();
++     // This convention allows using the Win64 convention on other targets.
++     case CallingConv::Win64:
++diff --git a/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp
++index 047bf5569ded3..59897785f156c 100644
++--- a/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp
+++++ b/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp
++@@ -92,7 +92,7 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
++ 
++   auto AddVariantDecl = [&](const ElementCount &VF) {
++     const std::string TLIName =
++-        std::string(TLI.getVectorizedFunction(ScalarName, VF));
+++        std::string(TLI.getVectorizedFunction(ScalarName, VF, CI.getFastMathFlags().isFast()));
++     if (!TLIName.empty()) {
++       std::string MangledName =
++           VFABI::mangleTLIVectorName(TLIName, ScalarName, CI.arg_size(), VF);
++diff --git a/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp
++index 46ff0994e04e7..f472af5e1a835 100644
++--- a/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp
+++++ b/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp
++@@ -712,6 +712,27 @@ class InnerLoopVectorizer {
++   virtual void printDebugTracesAtStart(){};
++   virtual void printDebugTracesAtEnd(){};
++ 
+++  /// Check legality of given SVML call instruction \p VecCall generated for
+++  /// scalar call \p Call. If illegal then the appropriate legal instruction
+++  /// is returned.
+++  Value *legalizeSVMLCall(CallInst *VecCall, CallInst *Call);
+++
+++  /// Returns the legal VF for a call instruction \p CI using TTI information
+++  /// and vector type.
+++  ElementCount getLegalVFForCall(CallInst *CI);
+++
+++  /// Partially vectorize a given call \p Call by breaking it down into multiple
+++  /// calls of \p LegalCall, decided by the variant VF \p LegalVF.
+++  Value *partialVectorizeCall(CallInst *Call, CallInst *LegalCall,
+++                              unsigned LegalVF);
+++
+++  /// Generate shufflevector instruction for a vector value \p V based on the
+++  /// current \p Part and a smaller VF \p LegalVF.
+++  Value *generateShuffleValue(Value *V, unsigned LegalVF, unsigned Part);
+++
+++  /// Combine partially vectorized calls stored in \p CallResults.
+++  Value *combinePartialVecCalls(SmallVectorImpl<Value *> &CallResults);
+++
++   /// The original loop.
++   Loop *OrigLoop;
++ 
++@@ -4596,6 +4617,17 @@ static bool mayDivideByZero(Instruction &I) {
++   return !CInt || CInt->isZero();
++ }
++ 
+++static void setVectorFunctionCallingConv(CallInst &CI, const DataLayout &DL,
+++                                         const TargetLibraryInfo &TLI) {
+++  Function *VectorF = CI.getCalledFunction();
+++  FunctionType *FTy = VectorF->getFunctionType();
+++  StringRef VFName = VectorF->getName();
+++  auto CC = TLI.getVectorizedFunctionCallingConv(VFName, *FTy, DL);
+++  if (CC) {
+++    CI.setCallingConv(*CC);
+++  }
+++}
+++
++ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
++                                                VPUser &ArgOperands,
++                                                VPTransformState &State) {
++@@ -4664,9 +4696,246 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
++       if (isa<FPMathOperator>(V))
++         V->copyFastMathFlags(CI);
++ 
+++    const DataLayout &DL = V->getModule()->getDataLayout();
+++    setVectorFunctionCallingConv(*V, DL, *TLI);
+++
+++    // Perform legalization of SVML call instruction only if original call
+++    // was not Intrinsic
+++    if (!UseVectorIntrinsic &&
+++        (V->getCalledFunction()->getName()).startswith("__svml")) {
+++      // assert((V->getCalledFunction()->getName()).startswith("__svml"));
+++      LLVM_DEBUG(dbgs() << "LV(SVML): Vector call inst:"; V->dump());
+++      auto *LegalV = cast<Instruction>(legalizeSVMLCall(V, CI));
+++      LLVM_DEBUG(dbgs() << "LV: Completed SVML legalization.\n LegalV: ";
+++                 LegalV->dump());
+++      State.set(Def, LegalV, Part);
+++      addMetadata(LegalV, &I);
+++    } else {
++       State.set(Def, V, Part);
++       addMetadata(V, &I);
+++    }
+++  }
+++}
+++
+++//===----------------------------------------------------------------------===//
+++// Implementation of functions for SVML vector call legalization.
+++//===----------------------------------------------------------------------===//
+++//
+++// Unlike other VECLIBs, SVML needs to be used with target-legal
+++// vector types. Otherwise, link failures and/or runtime failures
+++// will occur. A motivating example could be -
+++//
+++//   double *a;
+++//   float *b;
+++//   #pragma clang loop vectorize_width(8)
+++//   for(i = 0; i < N; ++i) {
+++//     a[i] = sin(i);   // Legal SVML VF must be 4 or below on AVX
+++//     b[i] = cosf(i);  // VF can be 8 on AVX since 8 floats can fit in YMM
+++//    }
+++//
+++// Current implementation of vector code generation in LV is
+++// driven based on a single VF (in InnerLoopVectorizer::VF). This
+++// inhibits the flexibility of adjusting/choosing different VF
+++// for different instructions.
+++//
+++// Due to this limitation it is much more straightforward to
+++// first generate the illegal sin8 (svml_sin8 for SVML vector
+++// library) call and then legalize it than trying to avoid
+++// generating illegal code from the beginning.
+++//
+++// A solution for this problem is to check legality of the
+++// call instruction right after generating it in vectorizer and
+++// if it is illegal we split the call arguments and issue multiple
+++// calls to match the legal VF. This is demonstrated currently for
+++// the SVML vector library calls (non-intrinsic version only).
+++//
+++// Future directions and extensions:
+++// 1) This legalization example shows us that a good direction
+++//    for the VPlan framework would be to model the vector call
+++//    instructions in a way that legal VF for each call is chosen
+++//    correctly within vectorizer and illegal code generation is
+++//    avoided.
+++// 2) This logic can also be extended to general vector functions
+++//    i.e. legalization OpenMP decalre simd functions. The
+++//    requirements needed for this will be documented soon.
+++
+++Value *InnerLoopVectorizer::legalizeSVMLCall(CallInst *VecCall,
+++                                             CallInst *Call) {
+++  ElementCount LegalVF = getLegalVFForCall(VecCall);
+++
+++  assert(LegalVF.getKnownMinValue() > 1 &&
+++         "Legal VF for SVML call must be greater than 1 to vectorize");
+++
+++  if (LegalVF == VF)
+++    return VecCall;
+++  else if (LegalVF.getKnownMinValue() > VF.getKnownMinValue())
+++    // TODO: handle case when we are underfilling vectors
+++    return VecCall;
+++
+++  // Legal VF for this SVML call is smaller than chosen VF, break it down into
+++  // smaller call instructions
+++
+++  // Convert args, types and return type to match legal VF
+++  SmallVector<Type *, 4> NewTys;
+++  SmallVector<Value *, 4> NewArgs;
+++
+++  for (Value *ArgOperand : Call->args()) {
+++    Type *Ty = ToVectorTy(ArgOperand->getType(), LegalVF);
+++    NewTys.push_back(Ty);
+++    NewArgs.push_back(UndefValue::get(Ty));
++   }
+++
+++  // Construct legal vector function
+++  const VFShape Shape =
+++    VFShape::get(*Call, LegalVF /*EC*/, false /*HasGlobalPred*/);
+++  Function *LegalVectorF = VFDatabase(*Call).getVectorizedFunction(Shape);
+++  assert(LegalVectorF != nullptr && "Can't create legal vector function.");
+++
+++  LLVM_DEBUG(dbgs() << "LV(SVML): LegalVectorF: "; LegalVectorF->dump());
+++
+++  SmallVector<OperandBundleDef, 1> OpBundles;
+++  Call->getOperandBundlesAsDefs(OpBundles);
+++  auto LegalV = std::unique_ptr<CallInst>(CallInst::Create(LegalVectorF, NewArgs, OpBundles));
+++
+++  if (isa<FPMathOperator>(LegalV))
+++    LegalV->copyFastMathFlags(Call);
+++
+++  const DataLayout &DL = VecCall->getModule()->getDataLayout();
+++  // Set SVML calling conventions
+++  setVectorFunctionCallingConv(*LegalV, DL, *TLI);
+++
+++  LLVM_DEBUG(dbgs() << "LV(SVML): LegalV: "; LegalV->dump());
+++
+++  Value *LegalizedCall = partialVectorizeCall(VecCall, LegalV.get(), LegalVF.getKnownMinValue());
+++
+++  LLVM_DEBUG(dbgs() << "LV(SVML): LegalizedCall: "; LegalizedCall->dump());
+++
+++  // Remove the illegal call from Builder
+++  VecCall->eraseFromParent();
+++
+++  return LegalizedCall;
+++}
+++
+++ElementCount InnerLoopVectorizer::getLegalVFForCall(CallInst *CI) {
+++  const DataLayout DL = CI->getModule()->getDataLayout();
+++  FunctionType *CallFT = CI->getFunctionType();
+++  // All functions that need legalization should have a vector return type.
+++  // This is true for all SVML functions that are currently supported.
+++  assert(isa<VectorType>(CallFT->getReturnType()) &&
+++         "Return type of call that needs legalization is not a vector.");
+++  auto *VecCallRetType = cast<VectorType>(CallFT->getReturnType());
+++  Type *ElemType = VecCallRetType->getElementType();
+++
+++  unsigned TypeBitWidth = DL.getTypeSizeInBits(ElemType);
+++  unsigned VectorBitWidth = TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector);
+++  unsigned LegalVF = VectorBitWidth / TypeBitWidth;
+++
+++  LLVM_DEBUG(dbgs() << "LV(SVML): Type Bit Width: " << TypeBitWidth << "\n");
+++  LLVM_DEBUG(dbgs() << "LV(SVML): Current VL: " << VF << "\n");
+++  LLVM_DEBUG(dbgs() << "LV(SVML): Vector Bit Width: " << VectorBitWidth
+++                    << "\n");
+++  LLVM_DEBUG(dbgs() << "LV(SVML): Legal Target VL: " << LegalVF << "\n");
+++
+++  return ElementCount::getFixed(LegalVF);
+++}
+++
+++// Partial vectorization of a call instruction is achieved by making clones of
+++// \p LegalCall and overwriting its argument operands with shufflevector
+++// equivalent decided based on \p LegalVF and current Part being filled.
+++Value *InnerLoopVectorizer::partialVectorizeCall(CallInst *Call,
+++                                                 CallInst *LegalCall,
+++                                                 unsigned LegalVF) {
+++  unsigned NumParts = VF.getKnownMinValue() / LegalVF;
+++  LLVM_DEBUG(dbgs() << "LV(SVML): NumParts: " << NumParts << "\n");
+++  SmallVector<Value *, 8> CallResults;
+++
+++  for (unsigned Part = 0; Part < NumParts; ++Part) {
+++    auto *ClonedCall = cast<CallInst>(LegalCall->clone());
+++
+++    // Update the arg operand of cloned call to shufflevector
+++    for (unsigned i = 0, ie = Call->arg_size(); i != ie; ++i) {
+++      auto *NewOp = generateShuffleValue(Call->getArgOperand(i), LegalVF, Part);
+++      ClonedCall->setArgOperand(i, NewOp);
+++    }
+++
+++    LLVM_DEBUG(dbgs() << "LV(SVML): ClonedCall: "; ClonedCall->dump());
+++
+++    auto *PartialVecCall = Builder.Insert(ClonedCall);
+++    CallResults.push_back(PartialVecCall);
+++  }
+++
+++  return combinePartialVecCalls(CallResults);
+++}
+++
+++Value *InnerLoopVectorizer::generateShuffleValue(Value *V, unsigned LegalVF,
+++                                                 unsigned Part) {
+++  // Example:
+++  // Consider the following vector code -
+++  // %1 = sitofp <4 x i32> %0 to <4 x double>
+++  // %2 = call <4 x double> @__svml_sin4(<4 x double> %1)
+++  //
+++  // If the LegalVF is 2, we partially vectorize the sin4 call by invoking
+++  // generateShuffleValue on the operand %1
+++  // If Part = 1, output value is -
+++  // %shuffle = shufflevector <4 x double> %1, <4 x double> undef, <2 x i32><i32 0, i32 1>
+++  // and if Part = 2, output is -
+++  // %shuffle7 =shufflevector <4 x double> %1, <4 x double> undef, <2 x i32><i32 2, i32 3>
+++
+++  assert(isa<VectorType>(V->getType()) &&
+++         "Cannot generate shuffles for non-vector values.");
+++  SmallVector<int, 4> ShuffleMask;
+++  Value *Undef = UndefValue::get(V->getType());
+++
+++  unsigned ElemIdx = Part * LegalVF;
+++
+++  for (unsigned K = 0; K < LegalVF; K++)
+++    ShuffleMask.push_back(static_cast<int>(ElemIdx + K));
+++
+++  auto *ShuffleInst =
+++      Builder.CreateShuffleVector(V, Undef, ShuffleMask, "shuffle");
+++
+++  return ShuffleInst;
+++}
+++
+++// Results of the calls executed by smaller legal call instructions must be
+++// combined to match the original VF for later use. This is done by constructing
+++// shufflevector instructions in a cumulative fashion.
+++Value *InnerLoopVectorizer::combinePartialVecCalls(
+++    SmallVectorImpl<Value *> &CallResults) {
+++  assert(isa<VectorType>(CallResults[0]->getType()) &&
+++         "Cannot combine calls with non-vector results.");
+++  auto *CallType = cast<VectorType>(CallResults[0]->getType());
+++
+++  Value *CombinedShuffle;
+++  unsigned NumElems = CallType->getElementCount().getKnownMinValue() * 2;
+++  unsigned NumRegs = CallResults.size();
+++
+++  assert(NumRegs >= 2 && isPowerOf2_32(NumRegs) &&
+++         "Number of partial vector calls to combine must be a power of 2 "
+++         "(atleast 2^1)");
+++
+++  while (NumRegs > 1) {
+++    for (unsigned I = 0; I < NumRegs; I += 2) {
+++      SmallVector<int, 4> ShuffleMask;
+++      for (unsigned J = 0; J < NumElems; J++)
+++        ShuffleMask.push_back(static_cast<int>(J));
+++
+++      CombinedShuffle = Builder.CreateShuffleVector(
+++          CallResults[I], CallResults[I + 1], ShuffleMask, "combined");
+++      LLVM_DEBUG(dbgs() << "LV(SVML): CombinedShuffle:";
+++                 CombinedShuffle->dump());
+++      CallResults.push_back(CombinedShuffle);
+++    }
+++
+++    SmallVector<Value *, 2>::iterator Start = CallResults.begin();
+++    SmallVector<Value *, 2>::iterator End = Start + NumRegs;
+++    CallResults.erase(Start, End);
+++
+++    NumElems *= 2;
+++    NumRegs /= 2;
+++  }
+++
+++  return CombinedShuffle;
++ }
++ 
++ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
++diff --git a/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll b/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
++index df8b7c498bd00..63a36549f18fd 100644
++--- a/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
+++++ b/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
++@@ -10,7 +10,7 @@ target triple = "x86_64-unknown-linux-gnu"
++ define <4 x double> @exp_v4(<4 x double> %in) {
++ ; SVML-LABEL: define {{[^@]+}}@exp_v4
++ ; SVML-SAME: (<4 x double> [[IN:%.*]]) {
++-; SVML-NEXT:    [[TMP1:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[IN]])
+++; SVML-NEXT:    [[TMP1:%.*]] = call <4 x double> @__svml_exp4_ha(<4 x double> [[IN]])
++ ; SVML-NEXT:    ret <4 x double> [[TMP1]]
++ ;
++ ; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_v4
++@@ -37,7 +37,7 @@ declare <4 x double> @llvm.exp.v4f64(<4 x double>) #0
++ define <4 x float> @exp_f32(<4 x float> %in) {
++ ; SVML-LABEL: define {{[^@]+}}@exp_f32
++ ; SVML-SAME: (<4 x float> [[IN:%.*]]) {
++-; SVML-NEXT:    [[TMP1:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[IN]])
+++; SVML-NEXT:    [[TMP1:%.*]] = call <4 x float> @__svml_expf4_ha(<4 x float> [[IN]])
++ ; SVML-NEXT:    ret <4 x float> [[TMP1]]
++ ;
++ ; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_f32
++diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
++index a6e191c3d6923..d6e2e11106949 100644
++--- a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
+++++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
++@@ -39,7 +39,8 @@ for.end:                                          ; preds = %for.body
++ declare double @__exp_finite(double) #0
++ 
++ ; CHECK-LABEL: @exp_f64
++-; CHECK: <4 x double> @__svml_exp4
+++; CHECK: <2 x double> @__svml_exp2
+++; CHECK: <2 x double> @__svml_exp2
++ ; CHECK: ret
++ define void @exp_f64(double* nocapture %varray) {
++ entry:
++@@ -99,7 +100,8 @@ for.end:                                          ; preds = %for.body
++ declare double @__log_finite(double) #0
++ 
++ ; CHECK-LABEL: @log_f64
++-; CHECK: <4 x double> @__svml_log4
+++; CHECK: <2 x double> @__svml_log2
+++; CHECK: <2 x double> @__svml_log2
++ ; CHECK: ret
++ define void @log_f64(double* nocapture %varray) {
++ entry:
++@@ -159,7 +161,8 @@ for.end:                                          ; preds = %for.body
++ declare double @__pow_finite(double, double) #0
++ 
++ ; CHECK-LABEL: @pow_f64
++-; CHECK: <4 x double> @__svml_pow4
+++; CHECK: <2 x double> @__svml_pow2
+++; CHECK: <2 x double> @__svml_pow2
++ ; CHECK: ret
++ define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
++ entry:
++@@ -190,7 +193,8 @@ declare float @__exp2f_finite(float) #0
++ 
++ define void @exp2f_finite(float* nocapture %varray) {
++ ; CHECK-LABEL: @exp2f_finite(
++-; CHECK:    call <4 x float> @__svml_exp2f4(<4 x float> %{{.*}})
+++; CHECK:    call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> %{{.*}})
+++; CHECK:    call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> %{{.*}})
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -219,7 +223,8 @@ declare double @__exp2_finite(double) #0
++ 
++ define void @exp2_finite(double* nocapture %varray) {
++ ; CHECK-LABEL: @exp2_finite(
++-; CHECK:    call <4 x double> @__svml_exp24(<4 x double> {{.*}})
+++; CHECK:    call intel_svmlcc128 <2 x double> @__svml_exp22_ha(<2 x double> {{.*}})
+++; CHECK:    call intel_svmlcc128 <2 x double> @__svml_exp22_ha(<2 x double> {{.*}})
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -276,7 +281,8 @@ for.end:                                          ; preds = %for.body
++ declare double @__log2_finite(double) #0
++ 
++ ; CHECK-LABEL: @log2_f64
++-; CHECK: <4 x double> @__svml_log24
+++; CHECK: <2 x double> @__svml_log22
+++; CHECK: <2 x double> @__svml_log22
++ ; CHECK: ret
++ define void @log2_f64(double* nocapture %varray) {
++ entry:
++@@ -333,7 +339,8 @@ for.end:                                          ; preds = %for.body
++ declare double @__log10_finite(double) #0
++ 
++ ; CHECK-LABEL: @log10_f64
++-; CHECK: <4 x double> @__svml_log104
+++; CHECK: <2 x double> @__svml_log102
+++; CHECK: <2 x double> @__svml_log102
++ ; CHECK: ret
++ define void @log10_f64(double* nocapture %varray) {
++ entry:
++@@ -390,7 +397,8 @@ for.end:                                          ; preds = %for.body
++ declare double @__sqrt_finite(double) #0
++ 
++ ; CHECK-LABEL: @sqrt_f64
++-; CHECK: <4 x double> @__svml_sqrt4
+++; CHECK: <2 x double> @__svml_sqrt2
+++; CHECK: <2 x double> @__svml_sqrt2
++ ; CHECK: ret
++ define void @sqrt_f64(double* nocapture %varray) {
++ entry:
++diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll
++index 42c280df6ad02..088bbdcf1aa4a 100644
++--- a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll
+++++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll
++@@ -48,7 +48,7 @@ declare float @llvm.exp2.f32(float) #0
++ 
++ define void @sin_f64(double* nocapture %varray) {
++ ; CHECK-LABEL: @sin_f64(
++-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -71,7 +71,7 @@ for.end:
++ 
++ define void @sin_f32(float* nocapture %varray) {
++ ; CHECK-LABEL: @sin_f32(
++-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_sinf4_ha(<4 x float> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -94,7 +94,7 @@ for.end:
++ 
++ define void @sin_f64_intrinsic(double* nocapture %varray) {
++ ; CHECK-LABEL: @sin_f64_intrinsic(
++-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -117,7 +117,7 @@ for.end:
++ 
++ define void @sin_f32_intrinsic(float* nocapture %varray) {
++ ; CHECK-LABEL: @sin_f32_intrinsic(
++-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_sinf4_ha(<4 x float> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -140,7 +140,7 @@ for.end:
++ 
++ define void @cos_f64(double* nocapture %varray) {
++ ; CHECK-LABEL: @cos_f64(
++-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -163,7 +163,7 @@ for.end:
++ 
++ define void @cos_f32(float* nocapture %varray) {
++ ; CHECK-LABEL: @cos_f32(
++-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_cosf4_ha(<4 x float> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -186,7 +186,7 @@ for.end:
++ 
++ define void @cos_f64_intrinsic(double* nocapture %varray) {
++ ; CHECK-LABEL: @cos_f64_intrinsic(
++-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -209,7 +209,7 @@ for.end:
++ 
++ define void @cos_f32_intrinsic(float* nocapture %varray) {
++ ; CHECK-LABEL: @cos_f32_intrinsic(
++-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_cosf4_ha(<4 x float> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -232,7 +232,7 @@ for.end:
++ 
++ define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
++ ; CHECK-LABEL: @pow_f64(
++-; CHECK:    [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
+++; CHECK:    [[TMP8:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -257,7 +257,7 @@ for.end:
++ 
++ define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
++ ; CHECK-LABEL: @pow_f64_intrinsic(
++-; CHECK:    [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
+++; CHECK:    [[TMP8:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -282,7 +282,7 @@ for.end:
++ 
++ define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
++ ; CHECK-LABEL: @pow_f32(
++-; CHECK:    [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
+++; CHECK:    [[TMP8:%.*]] = call intel_svmlcc128 <4 x float> @__svml_powf4_ha(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -307,7 +307,7 @@ for.end:
++ 
++ define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
++ ; CHECK-LABEL: @pow_f32_intrinsic(
++-; CHECK:    [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
+++; CHECK:    [[TMP8:%.*]] = call intel_svmlcc128 <4 x float> @__svml_powf4_ha(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -332,7 +332,7 @@ for.end:
++ 
++ define void @exp_f64(double* nocapture %varray) {
++ ; CHECK-LABEL: @exp_f64(
++-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -355,7 +355,7 @@ for.end:
++ 
++ define void @exp_f32(float* nocapture %varray) {
++ ; CHECK-LABEL: @exp_f32(
++-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_expf4_ha(<4 x float> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -378,7 +378,7 @@ for.end:
++ 
++ define void @exp_f64_intrinsic(double* nocapture %varray) {
++ ; CHECK-LABEL: @exp_f64_intrinsic(
++-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -401,7 +401,7 @@ for.end:
++ 
++ define void @exp_f32_intrinsic(float* nocapture %varray) {
++ ; CHECK-LABEL: @exp_f32_intrinsic(
++-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_expf4_ha(<4 x float> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -424,7 +424,7 @@ for.end:
++ 
++ define void @log_f64(double* nocapture %varray) {
++ ; CHECK-LABEL: @log_f64(
++-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -447,7 +447,7 @@ for.end:
++ 
++ define void @log_f32(float* nocapture %varray) {
++ ; CHECK-LABEL: @log_f32(
++-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_logf4_ha(<4 x float> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -470,7 +470,7 @@ for.end:
++ 
++ define void @log_f64_intrinsic(double* nocapture %varray) {
++ ; CHECK-LABEL: @log_f64_intrinsic(
++-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -493,7 +493,7 @@ for.end:
++ 
++ define void @log_f32_intrinsic(float* nocapture %varray) {
++ ; CHECK-LABEL: @log_f32_intrinsic(
++-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_logf4_ha(<4 x float> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -516,7 +516,7 @@ for.end:
++ 
++ define void @log2_f64(double* nocapture %varray) {
++ ; CHECK-LABEL: @log2_f64(
++-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log24_ha(<4 x double> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -539,7 +539,7 @@ for.end:
++ 
++ define void @log2_f32(float* nocapture %varray) {
++ ; CHECK-LABEL: @log2_f32(
++-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log2f4_ha(<4 x float> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -562,7 +562,7 @@ for.end:
++ 
++ define void @log2_f64_intrinsic(double* nocapture %varray) {
++ ; CHECK-LABEL: @log2_f64_intrinsic(
++-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log24_ha(<4 x double> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -585,7 +585,7 @@ for.end:
++ 
++ define void @log2_f32_intrinsic(float* nocapture %varray) {
++ ; CHECK-LABEL: @log2_f32_intrinsic(
++-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log2f4_ha(<4 x float> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -608,7 +608,7 @@ for.end:
++ 
++ define void @log10_f64(double* nocapture %varray) {
++ ; CHECK-LABEL: @log10_f64(
++-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log104_ha(<4 x double> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -631,7 +631,7 @@ for.end:
++ 
++ define void @log10_f32(float* nocapture %varray) {
++ ; CHECK-LABEL: @log10_f32(
++-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log10f4_ha(<4 x float> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -654,7 +654,7 @@ for.end:
++ 
++ define void @log10_f64_intrinsic(double* nocapture %varray) {
++ ; CHECK-LABEL: @log10_f64_intrinsic(
++-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log104_ha(<4 x double> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -677,7 +677,7 @@ for.end:
++ 
++ define void @log10_f32_intrinsic(float* nocapture %varray) {
++ ; CHECK-LABEL: @log10_f32_intrinsic(
++-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log10f4_ha(<4 x float> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -700,7 +700,7 @@ for.end:
++ 
++ define void @sqrt_f64(double* nocapture %varray) {
++ ; CHECK-LABEL: @sqrt_f64(
++-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sqrt4(<4 x double> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sqrt4_ha(<4 x double> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -723,7 +723,7 @@ for.end:
++ 
++ define void @sqrt_f32(float* nocapture %varray) {
++ ; CHECK-LABEL: @sqrt_f32(
++-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sqrtf4(<4 x float> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_sqrtf4_ha(<4 x float> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -746,7 +746,7 @@ for.end:
++ 
++ define void @exp2_f64(double* nocapture %varray) {
++ ; CHECK-LABEL: @exp2_f64(
++-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp24_ha(<4 x double> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -769,7 +769,7 @@ for.end:
++ 
++ define void @exp2_f32(float* nocapture %varray) {
++ ; CHECK-LABEL: @exp2_f32(
++-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -792,7 +792,7 @@ for.end:
++ 
++ define void @exp2_f64_intrinsic(double* nocapture %varray) {
++ ; CHECK-LABEL: @exp2_f64_intrinsic(
++-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp24_ha(<4 x double> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -815,7 +815,7 @@ for.end:
++ 
++ define void @exp2_f32_intrinsic(float* nocapture %varray) {
++ ; CHECK-LABEL: @exp2_f32_intrinsic(
++-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
+++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> [[TMP4:%.*]])
++ ; CHECK:    ret void
++ ;
++ entry:
++@@ -836,4 +836,44 @@ for.end:
++   ret void
++ }
++ 
+++; CHECK-LABEL: @atan2_finite
+++; CHECK: intel_svmlcc256 <4 x double> @__svml_atan24(
+++; CHECK: intel_svmlcc256 <4 x double> @__svml_atan24(
+++; CHECK: ret
+++
+++declare double @__atan2_finite(double, double) local_unnamed_addr #0
+++
+++define void @atan2_finite([100 x double]* nocapture %varray) local_unnamed_addr #0 {
+++entry:
+++  br label %for.cond1.preheader
+++
+++for.cond1.preheader:                              ; preds = %for.inc7, %entry
+++  %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for.inc7 ]
+++  %0 = trunc i64 %indvars.iv19 to i32
+++  %conv = sitofp i32 %0 to double
+++  br label %for.body3
+++
+++for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+++  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
+++  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+++  %1 = trunc i64 %indvars.iv.next to i32
+++  %conv4 = sitofp i32 %1 to double
+++  %call = tail call fast double @__atan2_finite(double %conv, double %conv4)
+++  %arrayidx6 = getelementptr inbounds [100 x double], [100 x double]* %varray, i64 %indvars.iv19, i64 %indvars.iv
+++  store double %call, double* %arrayidx6, align 8
+++  %exitcond = icmp eq i64 %indvars.iv.next, 100
+++  br i1 %exitcond, label %for.inc7, label %for.body3, !llvm.loop !5
+++
+++for.inc7:                                         ; preds = %for.body3
+++  %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
+++  %exitcond21 = icmp eq i64 %indvars.iv.next20, 100
+++  br i1 %exitcond21, label %for.end9, label %for.cond1.preheader
+++
+++for.end9:                                         ; preds = %for.inc7
+++  ret void
+++}
+++
++ attributes #0 = { nounwind readnone }
+++!5 = distinct !{!5, !6, !7}
+++!6 = !{!"llvm.loop.vectorize.width", i32 8}
+++!7 = !{!"llvm.loop.vectorize.enable", i1 true}
++diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll
++new file mode 100644
++index 0000000000000..326c763994343
++--- /dev/null
+++++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll
++@@ -0,0 +1,513 @@
+++; Check legalization of SVML calls, including intrinsic versions (like @llvm.<fn_name>.<type>).
+++
+++; RUN: opt -vector-library=SVML -inject-tli-mappings -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s
+++
+++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+++target triple = "x86_64-unknown-linux-gnu"
+++
+++declare double @sin(double) #0
+++declare float @sinf(float) #0
+++declare double @llvm.sin.f64(double) #0
+++declare float @llvm.sin.f32(float) #0
+++
+++declare double @cos(double) #0
+++declare float @cosf(float) #0
+++declare double @llvm.cos.f64(double) #0
+++declare float @llvm.cos.f32(float) #0
+++
+++declare double @pow(double, double) #0
+++declare float @powf(float, float) #0
+++declare double @llvm.pow.f64(double, double) #0
+++declare float @llvm.pow.f32(float, float) #0
+++
+++declare double @exp(double) #0
+++declare float @expf(float) #0
+++declare double @llvm.exp.f64(double) #0
+++declare float @llvm.exp.f32(float) #0
+++
+++declare double @log(double) #0
+++declare float @logf(float) #0
+++declare double @llvm.log.f64(double) #0
+++declare float @llvm.log.f32(float) #0
+++
+++
+++define void @sin_f64(double* nocapture %varray) {
+++; CHECK-LABEL: @sin_f64(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP2:%.*]])
+++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to double
+++  %call = tail call double @sin(double %conv)
+++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+++  store double %call, double* %arrayidx, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @sin_f32(float* nocapture %varray) {
+++; CHECK-LABEL: @sin_f32(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_sinf8_ha(<8 x float> [[TMP2:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to float
+++  %call = tail call float @sinf(float %conv)
+++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+++  store float %call, float* %arrayidx, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @sin_f64_intrinsic(double* nocapture %varray) {
+++; CHECK-LABEL: @sin_f64_intrinsic(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP2:%.*]])
+++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to double
+++  %call = tail call double @llvm.sin.f64(double %conv)
+++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+++  store double %call, double* %arrayidx, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @sin_f32_intrinsic(float* nocapture %varray) {
+++; CHECK-LABEL: @sin_f32_intrinsic(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_sinf8_ha(<8 x float> [[TMP2:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to float
+++  %call = tail call float @llvm.sin.f32(float %conv)
+++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+++  store float %call, float* %arrayidx, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @cos_f64(double* nocapture %varray) {
+++; CHECK-LABEL: @cos_f64(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP2:%.*]])
+++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to double
+++  %call = tail call double @cos(double %conv)
+++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+++  store double %call, double* %arrayidx, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @cos_f32(float* nocapture %varray) {
+++; CHECK-LABEL: @cos_f32(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_cosf8_ha(<8 x float> [[TMP2:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to float
+++  %call = tail call float @cosf(float %conv)
+++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+++  store float %call, float* %arrayidx, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @cos_f64_intrinsic(double* nocapture %varray) {
+++; CHECK-LABEL: @cos_f64_intrinsic(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP2:%.*]])
+++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to double
+++  %call = tail call double @llvm.cos.f64(double %conv)
+++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+++  store double %call, double* %arrayidx, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @cos_f32_intrinsic(float* nocapture %varray) {
+++; CHECK-LABEL: @cos_f32_intrinsic(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_cosf8_ha(<8 x float> [[TMP2:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to float
+++  %call = tail call float @llvm.cos.f32(float %conv)
+++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+++  store float %call, float* %arrayidx, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
+++; CHECK-LABEL: @pow_f64(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP2:%.*]], <4 x double> [[TMP3:%.*]])
+++; CHECK:    [[TMP4:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP5:%.*]], <4 x double> [[TMP6:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to double
+++  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
+++  %tmp1 = load double, double* %arrayidx, align 4
+++  %tmp2 = tail call double @pow(double %conv, double %tmp1)
+++  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
+++  store double %tmp2, double* %arrayidx2, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
+++; CHECK-LABEL: @pow_f64_intrinsic(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP2:%.*]], <4 x double> [[TMP3:%.*]])
+++; CHECK:    [[TMP4:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP5:%.*]], <4 x double> [[TMP6:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to double
+++  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
+++  %tmp1 = load double, double* %arrayidx, align 4
+++  %tmp2 = tail call double @llvm.pow.f64(double %conv, double %tmp1)
+++  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
+++  store double %tmp2, double* %arrayidx2, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
+++; CHECK-LABEL: @pow_f32(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_powf8_ha(<8 x float> [[TMP2:%.*]], <8 x float> [[WIDE_LOAD:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to float
+++  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
+++  %tmp1 = load float, float* %arrayidx, align 4
+++  %tmp2 = tail call float @powf(float %conv, float %tmp1)
+++  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
+++  store float %tmp2, float* %arrayidx2, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
+++; CHECK-LABEL: @pow_f32_intrinsic(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_powf8_ha(<8 x float> [[TMP2:%.*]], <8 x float> [[TMP3:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to float
+++  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
+++  %tmp1 = load float, float* %arrayidx, align 4
+++  %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1)
+++  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
+++  store float %tmp2, float* %arrayidx2, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @exp_f64(double* nocapture %varray) {
+++; CHECK-LABEL: @exp_f64(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP2:%.*]])
+++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to double
+++  %call = tail call double @exp(double %conv)
+++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+++  store double %call, double* %arrayidx, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @exp_f32(float* nocapture %varray) {
+++; CHECK-LABEL: @exp_f32(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_expf8_ha(<8 x float> [[TMP2:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to float
+++  %call = tail call float @expf(float %conv)
+++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+++  store float %call, float* %arrayidx, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @exp_f64_intrinsic(double* nocapture %varray) {
+++; CHECK-LABEL: @exp_f64_intrinsic(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP2:%.*]])
+++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to double
+++  %call = tail call double @llvm.exp.f64(double %conv)
+++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+++  store double %call, double* %arrayidx, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @exp_f32_intrinsic(float* nocapture %varray) {
+++; CHECK-LABEL: @exp_f32_intrinsic(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_expf8_ha(<8 x float> [[TMP2:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to float
+++  %call = tail call float @llvm.exp.f32(float %conv)
+++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+++  store float %call, float* %arrayidx, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @log_f64(double* nocapture %varray) {
+++; CHECK-LABEL: @log_f64(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP2:%.*]])
+++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to double
+++  %call = tail call double @log(double %conv)
+++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+++  store double %call, double* %arrayidx, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @log_f32(float* nocapture %varray) {
+++; CHECK-LABEL: @log_f32(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_logf8_ha(<8 x float> [[TMP2:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to float
+++  %call = tail call float @logf(float %conv)
+++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+++  store float %call, float* %arrayidx, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @log_f64_intrinsic(double* nocapture %varray) {
+++; CHECK-LABEL: @log_f64_intrinsic(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP2:%.*]])
+++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to double
+++  %call = tail call double @llvm.log.f64(double %conv)
+++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+++  store double %call, double* %arrayidx, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++define void @log_f32_intrinsic(float* nocapture %varray) {
+++; CHECK-LABEL: @log_f32_intrinsic(
+++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_logf8_ha(<8 x float> [[TMP2:%.*]])
+++; CHECK:    ret void
+++;
+++entry:
+++  br label %for.body
+++
+++for.body:
+++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+++  %tmp = trunc i64 %iv to i32
+++  %conv = sitofp i32 %tmp to float
+++  %call = tail call float @llvm.log.f32(float %conv)
+++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+++  store float %call, float* %arrayidx, align 4
+++  %iv.next = add nuw nsw i64 %iv, 1
+++  %exitcond = icmp eq i64 %iv.next, 1000
+++  br i1 %exitcond, label %for.end, label %for.body
+++
+++for.end:
+++  ret void
+++}
+++
+++attributes #0 = { nounwind readnone }
+++
++diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll
++new file mode 100644
++index 0000000000000..9422653445dc2
++--- /dev/null
+++++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll
++@@ -0,0 +1,61 @@
+++; Check that vector codegen splits illegal sin8 call to two sin4 calls on AVX for double datatype.
+++; The C code used to generate this test:
+++
+++; #include <math.h>
+++;
+++; void foo(double *a, int N){
+++;   int i;
+++; #pragma clang loop vectorize_width(8)
+++;   for (i=0;i<N;i++){
+++;     a[i] = sin(i);
+++;   }
+++; }
+++
+++; RUN: opt -vector-library=SVML -inject-tli-mappings -loop-vectorize -force-vector-width=8 -mattr=avx -S < %s | FileCheck %s
+++
+++; CHECK: [[I1:%.*]] = sitofp <8 x i32> [[I0:%.*]] to <8 x double>
+++; CHECK-NEXT: [[S1:%shuffle.*]] = shufflevector <8 x double> [[I1]], <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+++; CHECK-NEXT: [[I2:%.*]] = call fast intel_svmlcc256 <4 x double> @__svml_sin4(<4 x double> [[S1]])
+++; CHECK-NEXT: [[S2:%shuffle.*]] = shufflevector <8 x double> [[I1]], <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+++; CHECK-NEXT: [[I3:%.*]] = call fast intel_svmlcc256 <4 x double> @__svml_sin4(<4 x double> [[S2]])
+++; CHECK-NEXT: [[comb:%combined.*]] = shufflevector <4 x double> [[I2]], <4 x double> [[I3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+++; CHECK: store <8 x double> [[comb]], <8 x double>* [[TMP:%.*]], align 8
+++
+++
+++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+++target triple = "x86_64-unknown-linux-gnu"
+++
+++; Function Attrs: nounwind uwtable
+++define dso_local void @foo(double* nocapture %a, i32 %N) local_unnamed_addr #0 {
+++entry:
+++  %cmp5 = icmp sgt i32 %N, 0
+++  br i1 %cmp5, label %for.body.preheader, label %for.end
+++
+++for.body.preheader:                               ; preds = %entry
+++  %wide.trip.count = zext i32 %N to i64
+++  br label %for.body
+++
+++for.body:                                         ; preds = %for.body, %for.body.preheader
+++  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+++  %0 = trunc i64 %indvars.iv to i32
+++  %conv = sitofp i32 %0 to double
+++  %call = tail call fast double @sin(double %conv) #2
+++  %arrayidx = getelementptr inbounds double, double* %a, i64 %indvars.iv
+++  store double %call, double* %arrayidx, align 8, !tbaa !2
+++  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+++  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+++  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !6
+++
+++for.end:                                          ; preds = %for.body, %entry
+++  ret void
+++}
+++
+++; Function Attrs: nounwind
+++declare dso_local double @sin(double) local_unnamed_addr #1
+++
+++!2 = !{!3, !3, i64 0}
+++!3 = !{!"double", !4, i64 0}
+++!4 = !{!"omnipotent char", !5, i64 0}
+++!5 = !{!"Simple C/C++ TBAA"}
+++!6 = distinct !{!6, !7}
+++!7 = !{!"llvm.loop.vectorize.width", i32 8}
++diff --git a/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll b/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll
++index e8c83c4d9bd1f..615fdc29176a2 100644
++--- a/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll
+++++ b/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll
++@@ -12,12 +12,12 @@ target triple = "x86_64-unknown-linux-gnu"
++ 
++ ; COMMON-LABEL: @llvm.compiler.used = appending global
++ ; SVML-SAME:        [6 x i8*] [
++-; SVML-SAME:          i8* bitcast (<2 x double> (<2 x double>)* @__svml_sin2 to i8*),
++-; SVML-SAME:          i8* bitcast (<4 x double> (<4 x double>)* @__svml_sin4 to i8*),
++-; SVML-SAME:          i8* bitcast (<8 x double> (<8 x double>)* @__svml_sin8 to i8*),
++-; SVML-SAME:          i8* bitcast (<4 x float> (<4 x float>)* @__svml_log10f4 to i8*),
++-; SVML-SAME:          i8* bitcast (<8 x float> (<8 x float>)* @__svml_log10f8 to i8*),
++-; SVML-SAME:          i8* bitcast (<16 x float> (<16 x float>)* @__svml_log10f16 to i8*)
+++; SVML-SAME:          i8* bitcast (<2 x double> (<2 x double>)* @__svml_sin2_ha to i8*),
+++; SVML-SAME:          i8* bitcast (<4 x double> (<4 x double>)* @__svml_sin4_ha to i8*),
+++; SVML-SAME:          i8* bitcast (<8 x double> (<8 x double>)* @__svml_sin8_ha to i8*),
+++; SVML-SAME:          i8* bitcast (<4 x float> (<4 x float>)* @__svml_log10f4_ha to i8*),
+++; SVML-SAME:          i8* bitcast (<8 x float> (<8 x float>)* @__svml_log10f8_ha to i8*),
+++; SVML-SAME:          i8* bitcast (<16 x float> (<16 x float>)* @__svml_log10f16_ha to i8*)
++ ; MASSV-SAME:       [2 x i8*] [
++ ; MASSV-SAME:         i8* bitcast (<2 x double> (<2 x double>)* @__sind2 to i8*),
++ ; MASSV-SAME:         i8* bitcast (<4 x float> (<4 x float>)* @__log10f4 to i8*)
++@@ -59,9 +59,9 @@ declare float @llvm.log10.f32(float) #0
++ attributes #0 = { nounwind readnone }
++ 
++ ; SVML:      attributes #[[SIN]] = { "vector-function-abi-variant"=
++-; SVML-SAME:   "_ZGV_LLVM_N2v_sin(__svml_sin2),
++-; SVML-SAME:   _ZGV_LLVM_N4v_sin(__svml_sin4),
++-; SVML-SAME:   _ZGV_LLVM_N8v_sin(__svml_sin8)" }
+++; SVML-SAME:   "_ZGV_LLVM_N2v_sin(__svml_sin2_ha),
+++; SVML-SAME:   _ZGV_LLVM_N4v_sin(__svml_sin4_ha),
+++; SVML-SAME:   _ZGV_LLVM_N8v_sin(__svml_sin8_ha)" }
++ 
++ ; MASSV:      attributes #[[SIN]] = { "vector-function-abi-variant"=
++ ; MASSV-SAME:   "_ZGV_LLVM_N2v_sin(__sind2)" }
++diff --git a/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt b/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt
++index 97df6a55d1b59..199e0285c9e5d 100644
++--- a/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt
+++++ b/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt
++@@ -47,6 +47,7 @@ add_tablegen(llvm-tblgen LLVM
++   SearchableTableEmitter.cpp
++   SubtargetEmitter.cpp
++   SubtargetFeatureInfo.cpp
+++  SVMLEmitter.cpp
++   TableGen.cpp
++   Types.cpp
++   X86DisassemblerTables.cpp
++diff --git a/llvm-14.0.6.src/utils/TableGen/SVMLEmitter.cpp b/llvm-14.0.6.src/utils/TableGen/SVMLEmitter.cpp
++new file mode 100644
++index 0000000000000..a5aeea48db28b
++--- /dev/null
+++++ b/llvm-14.0.6.src/utils/TableGen/SVMLEmitter.cpp
++@@ -0,0 +1,110 @@
+++//===------ SVMLEmitter.cpp - Generate SVML function variants -------------===//
+++//
+++//                     The LLVM Compiler Infrastructure
+++//
+++// This file is distributed under the University of Illinois Open Source
+++// License. See LICENSE.TXT for details.
+++//
+++//===----------------------------------------------------------------------===//
+++//
+++// This tablegen backend emits the scalar to svml function map for TLI.
+++//
+++//===----------------------------------------------------------------------===//
+++
+++#include "CodeGenTarget.h"
+++#include "llvm/Support/Format.h"
+++#include "llvm/TableGen/Error.h"
+++#include "llvm/TableGen/Record.h"
+++#include "llvm/TableGen/TableGenBackend.h"
+++#include <map>
+++#include <vector>
+++
+++using namespace llvm;
+++
+++#define DEBUG_TYPE "SVMLVariants"
+++#include "llvm/Support/Debug.h"
+++
+++namespace {
+++
+++class SVMLVariantsEmitter {
+++
+++  RecordKeeper &Records;
+++
+++private:
+++  void emitSVMLVariants(raw_ostream &OS);
+++
+++public:
+++  SVMLVariantsEmitter(RecordKeeper &R) : Records(R) {}
+++
+++  void run(raw_ostream &OS);
+++};
+++} // End anonymous namespace
+++
+++/// \brief Emit the set of SVML variant function names.
+++// The default is to emit the high accuracy SVML variants until a mechanism is
+++// introduced to allow a selection of different variants through precision
+++// requirements specified by the user. This code generates mappings to svml
+++// that are in the scalar form of llvm intrinsics, math library calls, or the
+++// finite variants of math library calls.
+++void SVMLVariantsEmitter::emitSVMLVariants(raw_ostream &OS) {
+++
+++  const unsigned MinSinglePrecVL = 4;
+++  const unsigned MaxSinglePrecVL = 16;
+++  const unsigned MinDoublePrecVL = 2;
+++  const unsigned MaxDoublePrecVL = 8;
+++
+++  OS << "#ifdef GET_SVML_VARIANTS\n";
+++
+++  for (const auto &D : Records.getAllDerivedDefinitions("SvmlVariant")) {
+++    StringRef SvmlVariantNameStr = D->getName();
+++    // Single Precision SVML
+++    for (unsigned VL = MinSinglePrecVL; VL <= MaxSinglePrecVL; VL *= 2) {
+++      // Emit the scalar math library function to svml function entry.
+++      OS << "{\"" << SvmlVariantNameStr << "f" << "\", ";
+++      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
+++         << "ElementCount::getFixed(" << VL << ")},\n";
+++
+++      // Emit the scalar intrinsic to svml function entry.
+++      OS << "{\"" << "llvm." << SvmlVariantNameStr << ".f32" << "\", ";
+++      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
+++         << "ElementCount::getFixed(" << VL << ")},\n";
+++
+++      // Emit the finite math library function to svml function entry.
+++      OS << "{\"__" << SvmlVariantNameStr << "f_finite" << "\", ";
+++      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
+++         << "ElementCount::getFixed(" << VL << ")},\n";
+++    }
+++
+++    // Double Precision SVML
+++    for (unsigned VL = MinDoublePrecVL; VL <= MaxDoublePrecVL; VL *= 2) {
+++      // Emit the scalar math library function to svml function entry.
+++      OS << "{\"" << SvmlVariantNameStr << "\", ";
+++      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " << "ElementCount::getFixed(" << VL
+++         << ")},\n";
+++
+++      // Emit the scalar intrinsic to svml function entry.
+++      OS << "{\"" << "llvm." << SvmlVariantNameStr << ".f64" << "\", ";
+++      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " << "ElementCount::getFixed(" << VL
+++         << ")},\n";
+++
+++      // Emit the finite math library function to svml function entry.
+++      OS << "{\"__" << SvmlVariantNameStr << "_finite" << "\", ";
+++      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", "
+++         << "ElementCount::getFixed(" << VL << ")},\n";
+++    }
+++  }
+++
+++  OS << "#endif // GET_SVML_VARIANTS\n\n";
+++}
+++
+++void SVMLVariantsEmitter::run(raw_ostream &OS) {
+++  emitSVMLVariants(OS);
+++}
+++
+++namespace llvm {
+++
+++void EmitSVMLVariants(RecordKeeper &RK, raw_ostream &OS) {
+++  SVMLVariantsEmitter(RK).run(OS);
+++}
+++
+++} // End llvm namespace
++diff --git a/llvm-14.0.6.src/utils/TableGen/TableGen.cpp b/llvm-14.0.6.src/utils/TableGen/TableGen.cpp
++index 2d4a45f889be6..603d0c223b33a 100644
++--- a/llvm-14.0.6.src/utils/TableGen/TableGen.cpp
+++++ b/llvm-14.0.6.src/utils/TableGen/TableGen.cpp
++@@ -57,6 +57,7 @@ enum ActionType {
++   GenAutomata,
++   GenDirectivesEnumDecl,
++   GenDirectivesEnumImpl,
+++  GenSVMLVariants,
++ };
++ 
++ namespace llvm {
++@@ -138,7 +139,9 @@ cl::opt<ActionType> Action(
++         clEnumValN(GenDirectivesEnumDecl, "gen-directive-decl",
++                    "Generate directive related declaration code (header file)"),
++         clEnumValN(GenDirectivesEnumImpl, "gen-directive-impl",
++-                   "Generate directive related implementation code")));
+++                   "Generate directive related implementation code"),
+++        clEnumValN(GenSVMLVariants, "gen-svml",
+++                   "Generate SVML variant function names")));
++ 
++ cl::OptionCategory PrintEnumsCat("Options for -print-enums");
++ cl::opt<std::string> Class("class", cl::desc("Print Enum list for this class"),
++@@ -272,6 +275,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
++   case GenDirectivesEnumImpl:
++     EmitDirectivesImpl(Records, OS);
++     break;
+++  case GenSVMLVariants:
+++    EmitSVMLVariants(Records, OS);
+++    break;
++   }
++ 
++   return false;
++diff --git a/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h b/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h
++index 71db8dc77b052..86c3a3068c2dc 100644
++--- a/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h
+++++ b/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h
++@@ -93,6 +93,7 @@ void EmitExegesis(RecordKeeper &RK, raw_ostream &OS);
++ void EmitAutomata(RecordKeeper &RK, raw_ostream &OS);
++ void EmitDirectivesDecl(RecordKeeper &RK, raw_ostream &OS);
++ void EmitDirectivesImpl(RecordKeeper &RK, raw_ostream &OS);
+++void EmitSVMLVariants(RecordKeeper &RK, raw_ostream &OS);
++ 
++ } // End llvm namespace
++ 
++diff --git a/llvm-14.0.6.src/utils/vim/syntax/llvm.vim b/llvm-14.0.6.src/utils/vim/syntax/llvm.vim
++index 205db16b7d8cd..2572ab5a59e1b 100644
++--- a/llvm-14.0.6.src/utils/vim/syntax/llvm.vim
+++++ b/llvm-14.0.6.src/utils/vim/syntax/llvm.vim
++@@ -104,6 +104,7 @@ syn keyword llvmKeyword
++       \ inreg
++       \ intel_ocl_bicc
++       \ inteldialect
+++      \ intel_svmlcc
++       \ internal
++       \ jumptable
++       \ linkonce
+diff --git a/conda-recipes/llvmdev/bld.bat b/conda-recipes/llvmdev/bld.bat
+index 1ce228c80..0cba1e937 100644
+--- a/conda-recipes/llvmdev/bld.bat
++++ b/conda-recipes/llvmdev/bld.bat
+@@ -1,3 +1,13 @@
++setlocal EnableDelayedExpansion
++FOR /D %%d IN (llvm-*.src) DO (MKLINK /J llvm %%d
++if !errorlevel! neq 0 exit /b %errorlevel%)
++FOR /D %%d IN (lld-*.src) DO (MKLINK /J lld %%d
++if !errorlevel! neq 0 exit /b %errorlevel%)
++FOR /D %%d IN (unwind\libunwind-*.src) DO (MKLINK /J libunwind %%d
++if !errorlevel! neq 0 exit /b %errorlevel%)
++
++DIR
++
+ mkdir build
+ cd build
+ 
+@@ -24,31 +34,18 @@ REM the 64bit linker anyway. This must be passed in to certain generators as
+ REM '-Thost x64'.
+ set PreferredToolArchitecture=x64
+ 
+-set MAX_INDEX_CMAKE_GENERATOR=2
+-
+-REM On older generators we can squeete the architecture into the generator
+-REM name. In newer generators, we must use the -A flag for cmake to hand in the
+-REM correct architecture. Also, using Visual Studio 16 2019 we use toolset
+-REM v141, which basically means use a Visual Studio 15 2017 type compiler from
+-REM Visual Studio 16 2019. See also:
+-REM https://stackoverflow.com/questions/55708600/whats-the-cmake-generator-for-visual-studio-2019
++set MAX_INDEX_CMAKE_GENERATOR=0
+ 
+-set "CMAKE_GENERATOR[0]=Visual Studio 14 2015%ARCH_POSTFIX%"
+-set "CMAKE_GENERATOR[1]=Visual Studio 15 2017%ARCH_POSTFIX%"
+-set "CMAKE_GENERATOR[2]=Visual Studio 16 2019"
++set "CMAKE_GENERATOR[0]=Visual Studio 16 2019"
+ 
+-set "CMAKE_GENERATOR_ARCHITECTURE[0]="
+-set "CMAKE_GENERATOR_ARCHITECTURE[1]="
+-set "CMAKE_GENERATOR_ARCHITECTURE[2]=%GEN_ARCH%"
++set "CMAKE_GENERATOR_ARCHITECTURE[0]=%GEN_ARCH%"
+ 
+-set "CMAKE_GENERATOR_TOOLSET[0]=host %PreferredToolArchitecture%"
+-set "CMAKE_GENERATOR_TOOLSET[1]=host  %PreferredToolArchitecture%"
+-set "CMAKE_GENERATOR_TOOLSET[2]=v141"
++set "CMAKE_GENERATOR_TOOLSET[0]=v142"
+ 
+ REM Reduce build times and package size by removing unused stuff
+ REM BENCHMARKS (new for llvm8) don't build under Visual Studio 14 2015
+ set CMAKE_CUSTOM=-DLLVM_TARGETS_TO_BUILD="%LLVM_TARGETS_TO_BUILD%" ^
+-    -DLLVM_INCLUDE_TESTS=OFF ^
++    -DLLVM_ENABLE_PROJECTS:STRING=lld ^
+     -DLLVM_INCLUDE_UTILS=ON ^
+     -DLLVM_INCLUDE_DOCS=OFF ^
+     -DLLVM_INCLUDE_EXAMPLES=OFF ^
+@@ -67,7 +64,7 @@ for /l %%n in (0,1,%MAX_INDEX_CMAKE_GENERATOR%) do (
+           -DCMAKE_BUILD_TYPE="%BUILD_CONFIG%" ^
+           -DCMAKE_PREFIX_PATH="%LIBRARY_PREFIX%" ^
+           -DCMAKE_INSTALL_PREFIX:PATH="%LIBRARY_PREFIX%" ^
+-          %CMAKE_CUSTOM% "%SRC_DIR%"
++          %CMAKE_CUSTOM% "%SRC_DIR%\llvm"
+     if not errorlevel 1 goto configuration_successful
+     del CMakeCache.txt
+ )
+@@ -85,13 +82,3 @@ if errorlevel 1 exit 1
+ REM === Install step ===
+ cmake --build . --config "%BUILD_CONFIG%" --target install
+ if errorlevel 1 exit 1
+-
+-REM From: https://github.com/conda-forge/llvmdev-feedstock/pull/53
+-"%BUILD_CONFIG%\bin\opt" -S -vector-library=SVML -mcpu=haswell -O3 "%RECIPE_DIR%\numba-3016.ll" | "%BUILD_CONFIG%\bin\FileCheck" "%RECIPE_DIR%\numba-3016.ll"
+-if errorlevel 1 exit 1
+-
+-REM This is technically how to run the suite, but it will only run in an
+-REM enhanced unix-like shell which has functions like `grep` available.
+-REM cd ..\test
+-REM "%PYTHON%" "..\build\%BUILD_CONFIG%\bin\llvm-lit.py" -vv Transforms ExecutionEngine Analysis CodeGen/X86
+-REM if errorlevel 1 exit 1
+diff --git a/conda-recipes/llvmdev/build.sh b/conda-recipes/llvmdev/build.sh
+index fd99eee90..dc0af4074 100644
+--- a/conda-recipes/llvmdev/build.sh
++++ b/conda-recipes/llvmdev/build.sh
+@@ -15,10 +15,14 @@ else
+     DARWIN_TARGET=x86_64-apple-darwin13.4.0
+ fi
+ 
++mv llvm-*.src llvm
++mv lld-*.src lld
++mv unwind/libunwind-*.src libunwind
+ 
+ declare -a _cmake_config
+ _cmake_config+=(-DCMAKE_INSTALL_PREFIX:PATH=${PREFIX})
+ _cmake_config+=(-DCMAKE_BUILD_TYPE:STRING=Release)
++_cmake_config+=(-DLLVM_ENABLE_PROJECTS:STRING="lld")
+ # The bootstrap clang I use was built with a static libLLVMObject.a and I trying to get the same here
+ # _cmake_config+=(-DBUILD_SHARED_LIBS:BOOL=ON)
+ _cmake_config+=(-DLLVM_ENABLE_ASSERTIONS:BOOL=ON)
+@@ -27,6 +31,7 @@ _cmake_config+=(-DLINK_POLLY_INTO_TOOLS:BOOL=ON)
+ _cmake_config+=(-DLLVM_ENABLE_LIBXML2:BOOL=OFF)
+ # Urgh, llvm *really* wants to link to ncurses / terminfo and we *really* do not want it to.
+ _cmake_config+=(-DHAVE_TERMINFO_CURSES=OFF)
++_cmake_config+=(-DLLVM_ENABLE_TERMINFO=OFF)
+ # Sometimes these are reported as unused. Whatever.
+ _cmake_config+=(-DHAVE_TERMINFO_NCURSES=OFF)
+ _cmake_config+=(-DHAVE_TERMINFO_NCURSESW=OFF)
+@@ -39,10 +44,10 @@ _cmake_config+=(-DLLVM_ENABLE_RTTI=OFF)
+ _cmake_config+=(-DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD})
+ _cmake_config+=(-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly)
+ _cmake_config+=(-DLLVM_INCLUDE_UTILS=ON) # for llvm-lit
++_cmake_config+=(-DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF) # doesn't build without the rest of LLVM project
+ # TODO :: It would be nice if we had a cross-ecosystem 'BUILD_TIME_LIMITED' env var we could use to
+ #         disable these unnecessary but useful things.
+ if [[ ${CONDA_FORGE} == yes ]]; then
+-  _cmake_config+=(-DLLVM_INCLUDE_TESTS=OFF)
+   _cmake_config+=(-DLLVM_INCLUDE_DOCS=OFF)
+   _cmake_config+=(-DLLVM_INCLUDE_EXAMPLES=OFF)
+ fi
+@@ -76,7 +81,7 @@ cd build
+ 
+ cmake -G'Unix Makefiles'     \
+       "${_cmake_config[@]}"  \
+-      ..
++      ../llvm
+ 
+ ARCH=`uname -m`
+ if [ $ARCH == 'armv7l' ]; then # RPi need thread count throttling
+@@ -85,18 +90,7 @@ else
+     make -j${CPU_COUNT} VERBOSE=1
+ fi
+ 
++make check-llvm-unit || exit $?
++
+ # From: https://github.com/conda-forge/llvmdev-feedstock/pull/53
+ make install || exit $?
+-
+-# SVML tests on x86_64 arch only
+-if [[ $ARCH == 'x86_64' ]]; then
+-   bin/opt -S -vector-library=SVML -mcpu=haswell -O3 $RECIPE_DIR/numba-3016.ll | bin/FileCheck $RECIPE_DIR/numba-3016.ll || exit $?
+-fi
+-
+-# run the tests, skip some on linux-32
+-cd ../test
+-if [[ $ARCH == 'i686' ]]; then
+-    ../build/bin/llvm-lit -vv Transforms Analysis CodeGen/X86
+-else
+-    ../build/bin/llvm-lit -vv Transforms ExecutionEngine Analysis CodeGen/X86
+-fi
+diff --git a/conda-recipes/llvmdev/meta.yaml b/conda-recipes/llvmdev/meta.yaml
+index 27b596ffc..e2df508e9 100644
+--- a/conda-recipes/llvmdev/meta.yaml
++++ b/conda-recipes/llvmdev/meta.yaml
+@@ -1,8 +1,9 @@
+-{% set shortversion = "11.1" %}
+-{% set version = "11.1.0" %}
+-{% set sha256_llvm = "ce8508e318a01a63d4e8b3090ab2ded3c598a50258cc49e2625b9120d4c03ea5" %}
+-{% set sha256_lld = "017a788cbe1ecc4a949abf10755870519086d058a2e99f438829aef24f0c66ce" %}
+-{% set build_number = "5" %}
++{% set shortversion = "14.0" %}
++{% set version = "14.0.6" %}
++{% set sha256_llvm = "050922ecaaca5781fdf6631ea92bc715183f202f9d2f15147226f023414f619a" %}
++{% set sha256_lld = "0c28ce0496934d37d20fec96591032dd66af8d10178a45762e0e75e85cf95ad3" %}
++{% set sha256_libunwind = "3bbe9c23c73259fe39c045dc87d0b283236ba6e00750a226b2c2aeac4a51d86b" %}
++{% set build_number = "0" %}
+ 
+ package:
+   name: llvmdev
+@@ -13,20 +14,16 @@ source:
+     fn: llvm-{{ version }}.src.tar.xz
+     sha256: {{ sha256_llvm }}
+     patches:
+-    - ../partial-testing.patch
+-    # Intel SVML optimizations (two patches)
+-    - ../intel-D47188-svml-VF.patch
+-    # Second patch from https://github.com/conda-forge/llvmdev-feedstock/blob/c706309/recipe/patches/expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
+-    - ../expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
+-    # Reverts a patch limiting non-GlobalValue name length
+-    - ../0001-Revert-Limit-size-of-non-GlobalValue-name.patch
+-    # Fixes for aarch64 on LLVM 11 from https://reviews.llvm.org/D104123
+-    - ../llvm_11_consecutive_registers.patch
+-
++    - ../llvm14-remove-use-of-clonefile.patch
++    - ../llvm14-svml.patch
+   - url: https://github.com/llvm/llvm-project/releases/download/llvmorg-{{ version }}/lld-{{ version }}.src.tar.xz
+     fn: lld-{{ version }}.src.tar.xz
+     sha256: {{ sha256_lld }}
+-    folder: tools/lld
++
++  - url: https://github.com/llvm/llvm-project/releases/download/llvmorg-{{ version }}/libunwind-{{ version }}.src.tar.xz
++    fn: libunwind-{{ version }}.src.tar.xz
++    sha256: {{ sha256_libunwind }}
++    folder: unwind
+ 
+ build:
+   number: {{ build_number }}
+@@ -59,8 +56,6 @@ requirements:
+     - python # [not (armv6l or armv7l or aarch64 or win)]
+ 
+ test:
+-  files:
+-    - numba-3016.ll
+   commands:
+     - $PREFIX/bin/llvm-config --libs                         # [not win]
+     - $PREFIX/bin/llc -version                               # [not win]
+@@ -81,5 +76,5 @@ about:
+   home: http://llvm.org/
+   dev_url: https://github.com/llvm-mirror/llvm
+   license: NCSA
+-  license_file: LICENSE.TXT
++  license_file: llvm/LICENSE.TXT
+   summary: Development headers and libraries for LLVM
+diff --git a/conda-recipes/llvmdev/numba-3016.ll b/conda-recipes/llvmdev/numba-3016.ll
+deleted file mode 100644
+index 1a9b3ecf8..000000000
+--- a/conda-recipes/llvmdev/numba-3016.ll
++++ /dev/null
+@@ -1,80 +0,0 @@
+-; Regression test for llvmdev-feedstock#52 and numba#3016
+-
+-; Generated from C code: int a[1<<10],b[1<<10]; void foo() { int i=0; for(i=0; i<1<<10; i++) { b[i]=sin(a[i]); }}
+-; compiled: -fvectorize -fveclib=SVML -O -S -mavx -mllvm -disable-llvm-optzns -emit-llvm
+-
+-; RUN: opt -vector-library=SVML -mcpu=haswell -O3 -S < %s | FileCheck %s
+-; CHECK: call {{.*}}__svml_sin4_ha(
+-; CHECK-NOT: call {{.*}}__svml_sin4(
+-; CHECK-NOT: call {{.*}}__svml_sin8
+-
+-source_filename = "svml-3016.c"
+-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+-target triple = "x86_64-pc-linux-gnu"
+-
+-@a = common dso_local global [1024 x i32] zeroinitializer, align 16
+-@b = common dso_local global [1024 x i32] zeroinitializer, align 16
+-
+-; Function Attrs: nounwind uwtable
+-define dso_local void @foo() #0 {
+-  %1 = alloca i32, align 4
+-  %2 = bitcast i32* %1 to i8*
+-  call void @llvm.lifetime.start.p0i8(i64 4, i8* %2) #3
+-  store i32 0, i32* %1, align 4, !tbaa !2
+-  store i32 0, i32* %1, align 4, !tbaa !2
+-  br label %3
+-
+-; <label>:3:                                      ; preds = %17, %0
+-  %4 = load i32, i32* %1, align 4, !tbaa !2
+-  %5 = icmp slt i32 %4, 1024
+-  br i1 %5, label %6, label %20
+-
+-; <label>:6:                                      ; preds = %3
+-  %7 = load i32, i32* %1, align 4, !tbaa !2
+-  %8 = sext i32 %7 to i64
+-  %9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @a, i64 0, i64 %8
+-  %10 = load i32, i32* %9, align 4, !tbaa !2
+-  %11 = sitofp i32 %10 to double
+-  %12 = call double @"llvm.sin.f64"(double %11) #3
+-  %13 = fptosi double %12 to i32
+-  %14 = load i32, i32* %1, align 4, !tbaa !2
+-  %15 = sext i32 %14 to i64
+-  %16 = getelementptr inbounds [1024 x i32], [1024 x i32]* @b, i64 0, i64 %15
+-  store i32 %13, i32* %16, align 4, !tbaa !2
+-  br label %17
+-
+-; <label>:17:                                     ; preds = %6
+-  %18 = load i32, i32* %1, align 4, !tbaa !2
+-  %19 = add nsw i32 %18, 1
+-  store i32 %19, i32* %1, align 4, !tbaa !2
+-  br label %3
+-
+-; <label>:20:                                     ; preds = %3
+-  %21 = bitcast i32* %1 to i8*
+-  call void @llvm.lifetime.end.p0i8(i64 4, i8* %21) #3
+-  ret void
+-}
+-
+-; Function Attrs: argmemonly nounwind
+-declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+-
+-; Function Attrs: nounwind
+-declare dso_local double @"llvm.sin.f64"(double) #2
+-
+-; Function Attrs: argmemonly nounwind
+-declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+-
+-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+avx,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "unsafe-fp-math"="false" "use-soft-float"="false" }
+-attributes #1 = { argmemonly nounwind }
+-attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+avx,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "unsafe-fp-math"="false" "use-soft-float"="false" }
+-attributes #3 = { nounwind }
+-
+-!llvm.module.flags = !{!0}
+-!llvm.ident = !{!1}
+-
+-!0 = !{i32 1, !"wchar_size", i32 4}
+-!1 = !{!"clang version 7.0.0- (trunk)"}
+-!2 = !{!3, !3, i64 0}
+-!3 = !{!"int", !4, i64 0}
+-!4 = !{!"omnipotent char", !5, i64 0}
+-!5 = !{!"Simple C/C++ TBAA"}
+diff --git a/conda-recipes/llvmlite/bld.bat b/conda-recipes/llvmlite/bld.bat
+index 475a0637c..d7342e249 100755
+--- a/conda-recipes/llvmlite/bld.bat
++++ b/conda-recipes/llvmlite/bld.bat
+@@ -12,11 +12,8 @@ if "%ARCH%"=="32" (
+     @rem set CMAKE_GENERATOR_ARCH=Win64
+     set CMAKE_GENERATOR_ARCH=x64
+ )
+-@rem for older VS:
+-@rem set CMAKE_GENERATOR=Visual Studio 15 2017
+-@rem do not set CMAKE_GENERATOR_TOOLKIT
+ set CMAKE_GENERATOR=Visual Studio 16 2019
+-set CMAKE_GENERATOR_TOOLKIT=v141
++set CMAKE_GENERATOR_TOOLKIT=v142
+ 
+ @rem Ensure there are no build leftovers (CMake can complain)
+ if exist ffi\build rmdir /S /Q ffi\build
+diff --git a/conda-recipes/llvmlite/meta.yaml b/conda-recipes/llvmlite/meta.yaml
+index 27e09116d..fd63420ca 100644
+--- a/conda-recipes/llvmlite/meta.yaml
++++ b/conda-recipes/llvmlite/meta.yaml
+@@ -1,4 +1,4 @@
+-{% set VERSION_SUFFIX = "" %} # debug version suffix, appended to the version
++{% set VERSION_SUFFIX = "llvm14" %} # debug version suffix, appended to the version
+ 
+ package:
+   name: llvmlite
+@@ -23,19 +23,19 @@ requirements:
+     # build.sh deals with it!
+     - {{ compiler('c') }}    # [not (osx or armv6l or armv7l or win)]
+     - {{ compiler('cxx') }}  # [not (osx or armv6l or armv7l or win)]
+-    - vs2017_{{ target_platform  }}    # [win]
++    - vs2015_{{ target_platform  }}    # [win]
+     # The DLL build uses cmake on Windows
+     - cmake          # [win]
+     - make           # [unix and not (armv6l or armv7l or aarch64)]
+   host:
+     - python
+     # On channel https://anaconda.org/numba/
+-    - llvmdev 11.1.0 *5 # [(osx and arm64)]
+-    - llvmdev 11.1.0 *4 # [not ((osx and arm64) or win)]
+-    - llvmdev 11.1.0 4 # [win]
++    - llvmdev 14
+     - vs2015_runtime # [win]
+     # llvmdev is built with libz compression support
+     - zlib           # [unix and not (armv6l or armv7l)]
++    # requires libxml2
++    - libxml2        # [win]
+   run:
+     - python >=3.7,<3.10
+     - vs2015_runtime # [win]
+diff --git a/ffi/Makefile.freebsd b/ffi/Makefile.freebsd
+index ba727e331..7b869e876 100644
+--- a/ffi/Makefile.freebsd
++++ b/ffi/Makefile.freebsd
+@@ -1,5 +1,5 @@
+ 
+-CXX = clang++ -std=c++11 -stdlib=libc++
++CXX = clang++ -stdlib=libc++
+ 
+ # -flto and --exclude-libs allow us to remove those parts of LLVM we don't use
+ CXX_FLTO_FLAGS ?= -flto
+diff --git a/ffi/Makefile.osx b/ffi/Makefile.osx
+index bc192071e..74dccf32c 100644
+--- a/ffi/Makefile.osx
++++ b/ffi/Makefile.osx
+@@ -1,6 +1,6 @@
+ 
+-CXX = clang++ -std=c++11 -stdlib=libc++
+-CXXFLAGS = $(LLVM_CXXFLAGS)
++CXX = clang++
++CXXFLAGS = $(LLVM_CXXFLAGS) -O3
+ # Only export the LLVMPY symbols we require and exclude everything else.
+ EXPORT = "-Wl,-exported_symbol,_LLVMPY_*"
+ LDFLAGS :=  $(LDFLAGS) $(EXPORT) $(LLVM_LDFLAGS)
+diff --git a/ffi/build.py b/ffi/build.py
+index 55343fca5..e58a691e0 100755
+--- a/ffi/build.py
++++ b/ffi/build.py
+@@ -72,10 +72,10 @@ def find_windows_generator():
+         )
+ 
+     generators.extend([
+-        # use VS2017 toolkit on VS2019 to match how llvmdev is built
+-        ('Visual Studio 16 2019', ('x64' if is_64bit else 'Win32'), 'v141'),
+-        # This is the generator configuration for VS2017
+-        ('Visual Studio 15 2017' + (' Win64' if is_64bit else ''), None, None)
++        # use VS2019 to match how llvmdev is built
++        ('Visual Studio 16 2019', ('x64' if is_64bit else 'Win32'), 'v142'),
++        # # This is the generator configuration for VS2017
++        # ('Visual Studio 15 2017' + (' Win64' if is_64bit else ''), None, None)
+     ])
+     for generator in generators:
+         build_dir = tempfile.mkdtemp()
+@@ -163,9 +163,10 @@ def main_posix(kind, library_ext):
+         print(msg)
+         print(warning + '\n')
+     else:
+-
+-        if not out.startswith('11'):
+-            msg = ("Building llvmlite requires LLVM 11.x.x, got "
++        (version, _) = out.split('.', 1)
++        version = int(version)
++        if version < 11 or version > 14:
++            msg = ("Building llvmlite requires LLVM 11, 12, 13, or 14, got "
+                    "{!r}. Be sure to set LLVM_CONFIG to the right executable "
+                    "path.\nRead the documentation at "
+                    "http://llvmlite.pydata.org/ for more information about "
+diff --git a/ffi/passmanagers.cpp b/ffi/passmanagers.cpp
+index dd67ca5cc..60064cf10 100644
+--- a/ffi/passmanagers.cpp
++++ b/ffi/passmanagers.cpp
+@@ -16,11 +16,8 @@
+ 
+ #include "llvm-c/Transforms/IPO.h"
+ #include "llvm-c/Transforms/Scalar.h"
+-#include "llvm/IR/LegacyPassManager.h"
+-#if LLVM_VERSION_MAJOR > 11
+-#include "llvm/IR/RemarkStreamer.h"
+-#endif
+ #include "llvm/IR/LLVMRemarkStreamer.h"
++#include "llvm/IR/LegacyPassManager.h"
+ #include "llvm/Remarks/RemarkStreamer.h"
+ #include "llvm/Transforms/IPO.h"
+ #include "llvm/Transforms/Scalar.h"
+@@ -220,7 +217,11 @@ LLVMPY_AddLazyValueInfoPass(LLVMPassManagerRef PM) {
+ }
+ API_EXPORT(void)
+ LLVMPY_AddLintPass(LLVMPassManagerRef PM) {
++#if LLVM_VERSION_MAJOR < 12
+     unwrap(PM)->add(llvm::createLintPass());
++#else
++    unwrap(PM)->add(llvm::createLintLegacyPassPass());
++#endif
+ }
+ API_EXPORT(void)
+ LLVMPY_AddModuleDebugInfoPrinterPass(LLVMPassManagerRef PM) {
+diff --git a/ffi/targets.cpp b/ffi/targets.cpp
+index 3b5abf510..b96d22c9f 100644
+--- a/ffi/targets.cpp
++++ b/ffi/targets.cpp
+@@ -6,7 +6,11 @@
+ #include "llvm/IR/LegacyPassManager.h"
+ #include "llvm/IR/Type.h"
+ #include "llvm/Support/Host.h"
++#if LLVM_VERSION_MAJOR > 13
++#include "llvm/MC/TargetRegistry.h"
++#else
+ #include "llvm/Support/TargetRegistry.h"
++#endif
+ #include "llvm/Target/TargetMachine.h"
+ 
+ #include <cstdio>
+@@ -204,7 +208,11 @@ LLVMPY_CreateTargetMachine(LLVMTargetRef T, const char *Triple, const char *CPU,
+         rm = Reloc::DynamicNoPIC;
+ 
+     TargetOptions opt;
++#if LLVM_VERSION_MAJOR < 12
+     opt.PrintMachineCode = PrintMC;
++#else
++    opt.MCOptions.ShowMCInst = PrintMC;
++#endif
+     opt.MCOptions.ABIName = ABIName;
+ 
+     bool jit = JIT;
+diff --git a/ffi/value.cpp b/ffi/value.cpp
+index 771acd423..01871699d 100644
+--- a/ffi/value.cpp
++++ b/ffi/value.cpp
+@@ -153,8 +153,13 @@ LLVMPY_ArgumentAttributesIter(LLVMValueRef A) {
+     using namespace llvm;
+     Argument *arg = unwrap<Argument>(A);
+     unsigned argno = arg->getArgNo();
+-    AttributeSet attrs =
+-        arg->getParent()->getAttributes().getParamAttributes(argno);
++    const AttributeSet attrs = arg->getParent()->getAttributes().
++#if LLVM_VERSION_MAJOR < 14
++                               getParamAttributes(argno)
++#else
++                               getParamAttrs(argno)
++#endif
++        ;
+     return wrap(new AttributeSetIterator(attrs.begin(), attrs.end()));
+ }
+ 
+@@ -353,7 +358,11 @@ LLVMPY_GetElementType(LLVMTypeRef type) {
+     llvm::Type *unwrapped = llvm::unwrap(type);
+     llvm::PointerType *ty = llvm::dyn_cast<llvm::PointerType>(unwrapped);
+     if (ty != nullptr) {
++#if LLVM_VERSION_MAJOR < 14
+         return llvm::wrap(ty->getElementType());
++#else
++        return llvm::wrap(ty->getPointerElementType());
++#endif
+     }
+     return nullptr;
+ }
+diff --git a/llvmlite/binding/passmanagers.py b/llvmlite/binding/passmanagers.py
+index 26f7bd259..4b9daf468 100644
+--- a/llvmlite/binding/passmanagers.py
++++ b/llvmlite/binding/passmanagers.py
+@@ -199,7 +199,8 @@ def add_lint_pass(self):
+         """
+         See https://llvm.org/docs/Passes.html#lint-statically-lint-checks-llvm-ir
+ 
+-        LLVM 11+: `llvm::createLintPass`
++        LLVM 11: `llvm::createLintPass`
++        LLVM 12+: `llvm::createLintLegacyPassPass`
+         """  # noqa E501
+         ffi.lib.LLVMPY_AddLintPass(self)
+ 
+diff --git a/llvmlite/tests/test_binding.py b/llvmlite/tests/test_binding.py
+index dc4dbc484..70902e04c 100644
+--- a/llvmlite/tests/test_binding.py
++++ b/llvmlite/tests/test_binding.py
+@@ -640,7 +640,7 @@ def test_set_option(self):
+     def test_version(self):
+         major, minor, patch = llvm.llvm_version_info
+         # one of these can be valid
+-        valid = [(11,)]
++        valid = [(11,), (12, ), (13, ), (14, )]
+         self.assertIn((major,), valid)
+         self.assertIn(patch, range(10))
+ 
diff --git a/py3-llvmlite-fpic-flag.patch b/py3-llvmlite-fpic-flag.patch
deleted file mode 100644
index 8c012689d41..00000000000
--- a/py3-llvmlite-fpic-flag.patch
+++ /dev/null
@@ -1,13 +0,0 @@
-diff --git a/ffi/Makefile.linux b/ffi/Makefile.linux
-index fcfda4e..2d875cf 100644
---- a/ffi/Makefile.linux
-+++ b/ffi/Makefile.linux
-@@ -5,7 +5,7 @@ CXX ?= g++
- CXX_FLTO_FLAGS ?= -flto
- LD_FLTO_FLAGS ?= -flto -Wl,--exclude-libs=ALL
- 
--CXXFLAGS := $(CPPFLAGS) $(CXXFLAGS) $(LLVM_CXXFLAGS) $(CXX_FLTO_FLAGS)
-+CXXFLAGS := $(CPPFLAGS) $(CXXFLAGS) $(LLVM_CXXFLAGS) $(CXX_FLTO_FLAGS) -fPIC
- LDFLAGS := $(LDFLAGS) $(LLVM_LDFLAGS) $(LD_FLTO_FLAGS)
- LIBS = $(LLVM_LIBS)
- INCLUDE = core.h
diff --git a/py3-llvmlite-removeMethod.patch b/py3-llvmlite-removeMethod.patch
deleted file mode 100644
index 7cfffbbe359..00000000000
--- a/py3-llvmlite-removeMethod.patch
+++ /dev/null
@@ -1,12 +0,0 @@
-diff --git a/ffi/targets.cpp b/ffi/targets.cpp
-index 3b5abf5..49cef60 100644
---- a/ffi/targets.cpp
-+++ b/ffi/targets.cpp
-@@ -204,7 +204,6 @@ LLVMPY_CreateTargetMachine(LLVMTargetRef T, const char *Triple, const char *CPU,
-         rm = Reloc::DynamicNoPIC;
- 
-     TargetOptions opt;
--    opt.PrintMachineCode = PrintMC;
-     opt.MCOptions.ABIName = ABIName;
- 
-     bool jit = JIT;
diff --git a/py3-llvmlite-version.patch b/py3-llvmlite-version.patch
deleted file mode 100644
index a3c369650f5..00000000000
--- a/py3-llvmlite-version.patch
+++ /dev/null
@@ -1,13 +0,0 @@
-diff --git a/ffi/build.py b/ffi/build.py
-index 6408bf5..22f3b3c 100755
---- a/ffi/build.py
-+++ b/ffi/build.py
-@@ -163,7 +163,7 @@ def main_posix(kind, library_ext):
-         print(warning + '\n')
-     else:
- 
--        if not out.startswith('11'):
-+        if int(out.split(".")[0])<11:
-             msg = ("Building llvmlite requires LLVM 11.x.x, got "
-                    "{!r}. Be sure to set LLVM_CONFIG to the right executable "
-                    "path.\nRead the documentation at "
diff --git a/rust-libstdc.patch b/rust-libstdc.patch
new file mode 100644
index 00000000000..f4e6540d224
--- /dev/null
+++ b/rust-libstdc.patch
@@ -0,0 +1,35 @@
+diff --git a/compiler/rustc_llvm/build.rs b/compiler/rustc_llvm/build.rs
+index ac758c1..11abfac 100644
+--- a/compiler/rustc_llvm/build.rs
++++ b/compiler/rustc_llvm/build.rs
+@@ -347,18 +347,18 @@ fn main() {
+ 
+     // C++ runtime library
+     if !target.contains("msvc") {
+-        if let Some(s) = llvm_static_stdcpp {
+-            assert!(!cxxflags.contains("stdlib=libc++"));
+-            let path = PathBuf::from(s);
+-            println!("cargo:rustc-link-search=native={}", path.parent().unwrap().display());
+-            if target.contains("windows") {
+-                println!("cargo:rustc-link-lib=static:-bundle={}", stdcppname);
+-            } else {
+-                println!("cargo:rustc-link-lib=static={}", stdcppname);
+-            }
+-        } else if cxxflags.contains("stdlib=libc++") {
+-            println!("cargo:rustc-link-lib=c++");
+-        } else {
++        if let Some(_s) = llvm_static_stdcpp {
++        //    assert!(!cxxflags.contains("stdlib=libc++"));
++        //    let path = PathBuf::from(s);
++        //    println!("cargo:rustc-link-search=native={}", path.parent().unwrap().display());
++        //    if target.contains("windows") {
++        //        println!("cargo:rustc-link-lib=static:-bundle={}", stdcppname);
++        //    } else {
++        //        println!("cargo:rustc-link-lib=static={}", stdcppname);
++        //    }
++        //} else if cxxflags.contains("stdlib=libc++") {
++        //    println!("cargo:rustc-link-lib=c++");
++        //} else {
+             println!("cargo:rustc-link-lib={}", stdcppname);
+         }
+     }
diff --git a/rust.spec b/rust.spec
index 2ecd91ae389..ef2584bf4b7 100644
--- a/rust.spec
+++ b/rust.spec
@@ -1,4 +1,4 @@
-### RPM external rust 1.57.0
+### RPM external rust 1.61.0
 %ifarch ppc64le
 %define build_arch powerpc64le-unknown-linux-gnu
 %else
@@ -8,11 +8,13 @@
 %define branch master
 %define tag %{realversion}
 Source: git+https://github.com/%{github_user}/%{n}.git?obj=%{branch}/%{tag}&export=%{n}-%{realversion}&submodules=1&output=/%{n}-%{realversion}.tgz
+Patch0: rust-libstdc
 BuildRequires: python3
 Requires: llvm
 
 %prep
 %setup -n %{n}-%{realversion}
+%patch0 -p1
 
 %build
 cat << EOF > config.toml
diff --git a/scram-tools.file/tools/llvm/llvm-ccompiler.xml b/scram-tools.file/tools/llvm/llvm-ccompiler.xml
index 0ac59e88363..ebb0e43273c 100644
--- a/scram-tools.file/tools/llvm/llvm-ccompiler.xml
+++ b/scram-tools.file/tools/llvm/llvm-ccompiler.xml
@@ -4,4 +4,5 @@
       <environment name="LLVM_CCOMPILER_BASE" default="@TOOL_ROOT@"/>
       <environment name="CC" value="$LLVM_CCOMPILER_BASE/bin/clang"/>
     </client>
+    <flags CFLAGS="--gcc-toolchain=@GCC_ROOT@"/>
   </tool>
diff --git a/scram-tools.file/tools/llvm/llvm-cxxcompiler.xml b/scram-tools.file/tools/llvm/llvm-cxxcompiler.xml
index 234a965f146..4bc2f37d016 100644
--- a/scram-tools.file/tools/llvm/llvm-cxxcompiler.xml
+++ b/scram-tools.file/tools/llvm/llvm-cxxcompiler.xml
@@ -30,6 +30,7 @@
     <flags CXXFLAGS="-Wno-error=potentially-evaluated-expression"/>
     <flags CXXFLAGS="-Wno-tautological-type-limit-compare"/>
     <flags CXXFLAGS="-fsized-deallocation"/>
+    <flags CXXFLAGS="--gcc-toolchain=@GCC_ROOT@"/>
     <runtime name="@OS_RUNTIME_LDPATH_NAME@" value="$LLVM_CXXCOMPILER_BASE/lib64" type="path"/>
     <runtime name="PATH" value="$LLVM_CXXCOMPILER_BASE/bin" type="path"/>
   </tool>