MINGW-packages/mingw-w64-python-llvmlite/llvm-14.patch

From d143023557a117d4cad4b4785ac3e4bb36264e3e Mon Sep 17 00:00:00 2001
From: Andre Masella <andre@masella.name>
Date: Tue, 5 Apr 2022 15:22:21 -0400
Subject: [PATCH] Update to LLVM 12-14

Modify llvmlite to support LLVM 11-14 and modify conda recipe to build LLVM14.
Also lift over all patches to LLVM versions as required.
---
 ...-Limit-size-of-non-GlobalValue-name.patch} |    0
 ...tch => llvm11-consecutive_registers.patch} |    0
 ...-entrypoints-in-add-TLI-mappings.ll.patch} |    0
 ...atch => llvm11-intel-D47188-svml-VF.patch} |    0
 ...o-static.patch => llvm11-lto-static.patch} |    0
 ...ing.patch => llvm11-partial-testing.patch} |    0
 ...t-Limit-size-of-non-GlobalValue-name.patch |   49 +
 .../llvm12-consecutive_registers.patch        |  181 ++
 conda-recipes/llvm12-lto-static.patch         |   12 +
 conda-recipes/llvm13-lto-static.patch         |   12 +
 .../llvm14-remove-use-of-clonefile.patch      |   54 +
 conda-recipes/llvm14-svml.patch               | 2192 +++++++++++++++++
 conda-recipes/llvmdev/bld.bat                 |   35 +-
 conda-recipes/llvmdev/build.sh                |   18 +-
 conda-recipes/llvmdev/meta.yaml               |   31 +-
 conda-recipes/llvmlite/bld.bat                |    5 +-
 conda-recipes/llvmlite/meta.yaml              |   10 +-
 ffi/Makefile.freebsd                          |    2 +-
 ffi/Makefile.osx                              |    4 +-
 ffi/build.py                                  |   15 +-
 ffi/passmanagers.cpp                          |    9 +-
 ffi/targets.cpp                               |    8 +
 ffi/value.cpp                                 |   13 +-
 llvmlite/binding/passmanagers.py              |    3 +-
 llvmlite/tests/test_binding.py                |    2 +-
 25 files changed, 2583 insertions(+), 72 deletions(-)
 rename conda-recipes/{0001-Revert-Limit-size-of-non-GlobalValue-name.patch => llvm11-0001-Revert-Limit-size-of-non-GlobalValue-name.patch} (100%)
 rename conda-recipes/{llvm_11_consecutive_registers.patch => llvm11-consecutive_registers.patch} (100%)
 rename conda-recipes/{expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch => llvm11-expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch} (100%)
 rename conda-recipes/{intel-D47188-svml-VF.patch => llvm11-intel-D47188-svml-VF.patch} (100%)
 rename conda-recipes/{llvm-lto-static.patch => llvm11-lto-static.patch} (100%)
 rename conda-recipes/{partial-testing.patch => llvm11-partial-testing.patch} (100%)
 create mode 100644 conda-recipes/llvm12-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
 create mode 100644 conda-recipes/llvm12-consecutive_registers.patch
 create mode 100644 conda-recipes/llvm12-lto-static.patch
 create mode 100644 conda-recipes/llvm13-lto-static.patch
 create mode 100644 conda-recipes/llvm14-remove-use-of-clonefile.patch
 create mode 100644 conda-recipes/llvm14-svml.patch

diff --git a/conda-recipes/0001-Revert-Limit-size-of-non-GlobalValue-name.patch b/conda-recipes/llvm11-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
similarity index 100%
rename from conda-recipes/0001-Revert-Limit-size-of-non-GlobalValue-name.patch
rename to conda-recipes/llvm11-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
diff --git a/conda-recipes/llvm_11_consecutive_registers.patch b/conda-recipes/llvm11-consecutive_registers.patch
similarity index 100%
rename from conda-recipes/llvm_11_consecutive_registers.patch
rename to conda-recipes/llvm11-consecutive_registers.patch
diff --git a/conda-recipes/expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch b/conda-recipes/llvm11-expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
similarity index 100%
rename from conda-recipes/expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
rename to conda-recipes/llvm11-expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
diff --git a/conda-recipes/intel-D47188-svml-VF.patch b/conda-recipes/llvm11-intel-D47188-svml-VF.patch
similarity index 100%
rename from conda-recipes/intel-D47188-svml-VF.patch
rename to conda-recipes/llvm11-intel-D47188-svml-VF.patch
diff --git a/conda-recipes/llvm-lto-static.patch b/conda-recipes/llvm11-lto-static.patch
similarity index 100%
rename from conda-recipes/llvm-lto-static.patch
rename to conda-recipes/llvm11-lto-static.patch
diff --git a/conda-recipes/partial-testing.patch b/conda-recipes/llvm11-partial-testing.patch
similarity index 100%
rename from conda-recipes/partial-testing.patch
rename to conda-recipes/llvm11-partial-testing.patch
diff --git a/conda-recipes/llvm12-0001-Revert-Limit-size-of-non-GlobalValue-name.patch b/conda-recipes/llvm12-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
new file mode 100644
index 000000000..9b722d36c
--- /dev/null
+++ b/conda-recipes/llvm12-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
@@ -0,0 +1,49 @@
+diff -ur a/lib/IR/Value.cpp b/lib/IR/Value.cpp
+--- a/lib/IR/Value.cpp	2021-04-06 12:38:18.000000000 -0400
++++ b/lib/IR/Value.cpp	2022-03-31 15:39:31.000000000 -0400
+@@ -38,10 +38,6 @@
+
+ using namespace llvm;
+
+-static cl::opt<unsigned> NonGlobalValueMaxNameSize(
+-    "non-global-value-max-name-size", cl::Hidden, cl::init(1024),
+-    cl::desc("Maximum size for the name of non-global values."));
+-
+ //===----------------------------------------------------------------------===//
+ //                                Value Class
+ //===----------------------------------------------------------------------===//
+@@ -319,11 +315,6 @@
+   if (getName() == NameRef)
+     return;
+
+-  // Cap the size of non-GlobalValue names.
+-  if (NameRef.size() > NonGlobalValueMaxNameSize && !isa<GlobalValue>(this))
+-    NameRef =
+-        NameRef.substr(0, std::max(1u, (unsigned)NonGlobalValueMaxNameSize));
+-
+   assert(!getType()->isVoidTy() && "Cannot assign a name to void values!");
+
+   // Get the symbol table to update for this object.
+diff -ur a/test/Bitcode/value-with-long-name.ll b/test/Bitcode/value-with-long-name.ll
+deleted file mode 1000644
+--- a/test/Bitcode/value-with-long-name.ll
++++ /dev/null
+@@ -1,18 +0,0 @@
+-; Check the size of generated variable when no option is set
+-; RUN: opt -S %s -O2 -o - | FileCheck -check-prefix=CHECK-LONG %s
+-; CHECK-LONG: %{{[a-z]{4}[a-z]+}}
+-
+-; Then check we correctly cap the size of newly generated non-global values name
+-; Force the size to be small so that the check works on release and debug build
+-; RUN: opt -S %s -O2 -o - -non-global-value-max-name-size=0 | FileCheck -check-prefix=CHECK-SHORT %s
+-; RUN: opt -S %s -O2 -o - -non-global-value-max-name-size=1 | FileCheck -check-prefix=CHECK-SHORT %s
+-; CHECK-SHORT-NOT: %{{[a-z][a-z]+}}
+-
+-define i32 @f(i32 %a, i32 %b) {
+-  %c = add i32 %a, %b
+-  %d = add i32 %c, %a
+-  %e = add i32 %d, %b
+-  ret i32 %e
+-}
+-
+-
diff --git a/conda-recipes/llvm12-consecutive_registers.patch b/conda-recipes/llvm12-consecutive_registers.patch
new file mode 100644
index 000000000..cc60217bd
--- /dev/null
+++ b/conda-recipes/llvm12-consecutive_registers.patch
@@ -0,0 +1,181 @@
+diff -ur a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h
+--- a/include/llvm/CodeGen/TargetLowering.h	2021-04-06 12:38:18.000000000 -0400
++++ b/include/llvm/CodeGen/TargetLowering.h	2022-03-31 15:52:45.000000000 -0400
+@@ -3975,7 +3975,8 @@
+   /// must be passed in a block of consecutive registers.
+   virtual bool
+   functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv,
+-                                            bool isVarArg) const {
++                                            bool isVarArg,
++                                            const DataLayout &DL) const {
+     return false;
+   }
+
+diff -ur a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
+--- a/lib/CodeGen/SelectionDAG/FastISel.cpp	2021-04-06 12:38:18.000000000 -0400
++++ b/lib/CodeGen/SelectionDAG/FastISel.cpp	2022-03-31 15:52:45.000000000 -0400
+@@ -1087,7 +1087,7 @@
+     if (Arg.IsByVal)
+       FinalType = cast<PointerType>(Arg.Ty)->getElementType();
+     bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
+-        FinalType, CLI.CallConv, CLI.IsVarArg);
++        FinalType, CLI.CallConv, CLI.IsVarArg, DL);
+
+     ISD::ArgFlagsTy Flags;
+     if (Arg.IsZExt)
+diff -ur a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp	2021-04-06 12:38:18.000000000 -0400
++++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp	2022-03-31 15:52:45.000000000 -0400
+@@ -1851,7 +1851,7 @@
+
+       bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
+           I.getOperand(0)->getType(), F->getCallingConv(),
+-          /*IsVarArg*/ false);
++          /*IsVarArg*/ false, DL);
+
+       ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+       if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
+@@ -9229,7 +9229,7 @@
+     CLI.IsTailCall = false;
+   } else {
+     bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
+-        CLI.RetTy, CLI.CallConv, CLI.IsVarArg);
++        CLI.RetTy, CLI.CallConv, CLI.IsVarArg, DL);
+     for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+       ISD::ArgFlagsTy Flags;
+       if (NeedsRegBlock) {
+@@ -9289,7 +9289,7 @@
+     if (Args[i].IsByVal)
+       FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
+     bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
+-        FinalType, CLI.CallConv, CLI.IsVarArg);
++        FinalType, CLI.CallConv, CLI.IsVarArg, DL);
+     for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
+          ++Value) {
+       EVT VT = ValueVTs[Value];
+@@ -9830,7 +9830,7 @@
+     if (Arg.hasAttribute(Attribute::ByVal))
+       FinalType = Arg.getParamByValType();
+     bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
+-        FinalType, F.getCallingConv(), F.isVarArg());
++        FinalType, F.getCallingConv(), F.isVarArg(), DL);
+     for (unsigned Value = 0, NumValues = ValueVTs.size();
+          Value != NumValues; ++Value) {
+       EVT VT = ValueVTs[Value];
+diff -ur a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
+--- a/lib/Target/AArch64/AArch64ISelLowering.cpp	2021-04-06 12:38:18.000000000 -0400
++++ b/lib/Target/AArch64/AArch64ISelLowering.cpp	2022-03-31 15:52:45.000000000 -0400
+@@ -30,6 +30,7 @@
+ #include "llvm/ADT/Triple.h"
+ #include "llvm/ADT/Twine.h"
+ #include "llvm/Analysis/VectorUtils.h"
++#include "llvm/CodeGen/Analysis.h"
+ #include "llvm/CodeGen/CallingConvLower.h"
+ #include "llvm/CodeGen/MachineBasicBlock.h"
+ #include "llvm/CodeGen/MachineFrameInfo.h"
+@@ -16455,15 +16456,17 @@
+ }
+
+ bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
+-    Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
+-  if (Ty->isArrayTy())
+-    return true;
+-
+-  const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
+-  if (TySize.isScalable() && TySize.getKnownMinSize() > 128)
+-    return true;
++    Type *Ty, CallingConv::ID CallConv, bool isVarArg,
++    const DataLayout &DL) const {
++  if (!Ty->isArrayTy()) {
++    const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
++    return TySize.isScalable() && TySize.getKnownMinSize() > 128;
++  }
+
+-  return false;
++  // All non aggregate members of the type must have the same type
++  SmallVector<EVT, 0> ValueVTs;
++  ComputeValueVTs(*this, DL, Ty, ValueVTs);
++  return is_splat(ValueVTs);
+ }
+
+ bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
+diff -ur a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
+--- a/lib/Target/AArch64/AArch64ISelLowering.h	2021-04-06 12:38:18.000000000 -0400
++++ b/lib/Target/AArch64/AArch64ISelLowering.h	2022-03-31 15:52:45.000000000 -0400
+@@ -770,9 +770,10 @@
+   MachineMemOperand::Flags getTargetMMOFlags(
+     const Instruction &I) const override;
+
+-  bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
+-                                                 CallingConv::ID CallConv,
+-                                                 bool isVarArg) const override;
++  bool functionArgumentNeedsConsecutiveRegisters(
++      Type *Ty, CallingConv::ID CallConv, bool isVarArg,
++      const DataLayout &DL) const override;
++
+   /// Used for exception handling on Win64.
+   bool needsFixedCatchObjects() const override;
+
+diff -ur a/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+--- a/lib/Target/AArch64/GISel/AArch64CallLowering.cpp	2021-04-06 12:38:18.000000000 -0400
++++ b/lib/Target/AArch64/GISel/AArch64CallLowering.cpp	2022-03-31 15:52:45.000000000 -0400
+@@ -259,7 +259,7 @@
+   assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
+
+   bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
+-      OrigArg.Ty, CallConv, false);
++      OrigArg.Ty, CallConv, false, DL);
+   for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
+     Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
+     SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags[0],
+diff -ur a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
+--- a/lib/Target/ARM/ARMCallLowering.cpp	2021-04-06 12:38:18.000000000 -0400
++++ b/lib/Target/ARM/ARMCallLowering.cpp	2022-03-31 15:52:45.000000000 -0400
+@@ -218,7 +218,7 @@
+
+     bool NeedsConsecutiveRegisters =
+         TLI.functionArgumentNeedsConsecutiveRegisters(
+-            SplitTy, F.getCallingConv(), F.isVarArg());
++            SplitTy, F.getCallingConv(), F.isVarArg(), DL);
+     if (NeedsConsecutiveRegisters) {
+       Flags.setInConsecutiveRegs();
+       if (i == e - 1)
+diff -ur a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
+--- a/lib/Target/ARM/ARMISelLowering.cpp	2021-04-06 12:38:18.000000000 -0400
++++ b/lib/Target/ARM/ARMISelLowering.cpp	2022-03-31 15:52:45.000000000 -0400
+@@ -19269,7 +19269,8 @@
+ /// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
+ /// passing according to AAPCS rules.
+ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
+-    Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
++    Type *Ty, CallingConv::ID CallConv, bool isVarArg,
++    const DataLayout &DL) const {
+   if (getEffectiveCallingConv(CallConv, isVarArg) !=
+       CallingConv::ARM_AAPCS_VFP)
+     return false;
+diff -ur a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
+--- a/lib/Target/ARM/ARMISelLowering.h	2021-04-06 12:38:18.000000000 -0400
++++ b/lib/Target/ARM/ARMISelLowering.h	2022-03-31 15:52:45.000000000 -0400
+@@ -578,7 +578,8 @@
+     /// Returns true if an argument of type Ty needs to be passed in a
+     /// contiguous block of registers in calling convention CallConv.
+     bool functionArgumentNeedsConsecutiveRegisters(
+-        Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
++        Type *Ty, CallingConv::ID CallConv, bool isVarArg,
++        const DataLayout &DL) const override;
+
+     /// If a physical register, this returns the register that receives the
+     /// exception address on entry to an EH pad.
+diff -ur a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
+--- a/lib/Target/PowerPC/PPCISelLowering.h	2021-04-06 12:38:18.000000000 -0400
++++ b/lib/Target/PowerPC/PPCISelLowering.h	2022-03-31 15:52:45.000000000 -0400
+@@ -998,7 +998,8 @@
+     /// Returns true if an argument of type Ty needs to be passed in a
+     /// contiguous block of registers in calling convention CallConv.
+     bool functionArgumentNeedsConsecutiveRegisters(
+-      Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override {
++        Type *Ty, CallingConv::ID CallConv, bool isVarArg,
++        const DataLayout &DL) const override {
+       // We support any array type as "consecutive" block in the parameter
+       // save area.  The element type defines the alignment requirement and
+       // whether the argument should go in GPRs, FPRs, or VRs if available.
diff --git a/conda-recipes/llvm12-lto-static.patch b/conda-recipes/llvm12-lto-static.patch
new file mode 100644
index 000000000..76cc55def
--- /dev/null
+++ b/conda-recipes/llvm12-lto-static.patch
@@ -0,0 +1,12 @@
+diff -ur a/tools/lto/CMakeLists.txt b/tools/lto/CMakeLists.txt
+--- llvm-12.0.0.src-orig/tools/lto/CMakeLists.txt	2021-04-06 12:38:18.000000000 -0400
++++ llvm-12.0.0.src/tools/lto/CMakeLists.txt	2022-03-31 15:46:00.000000000 -0400
+@@ -21,7 +21,7 @@
+
+ set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/lto.exports)
+
+-add_llvm_library(LTO SHARED INSTALL_WITH_TOOLCHAIN ${SOURCES} DEPENDS
++add_llvm_library(LTO INSTALL_WITH_TOOLCHAIN ${SOURCES} DEPENDS
+     intrinsics_gen)
+
+ install(FILES ${LLVM_MAIN_INCLUDE_DIR}/llvm-c/lto.h
diff --git a/conda-recipes/llvm13-lto-static.patch b/conda-recipes/llvm13-lto-static.patch
new file mode 100644
index 000000000..b8a624250
--- /dev/null
+++ b/conda-recipes/llvm13-lto-static.patch
@@ -0,0 +1,12 @@
+diff -ur llvm-13.0.0.src-orig/tools/lto/CMakeLists.txt llvm-13.0.0.src/tools/lto/CMakeLists.txt
+--- llvm-13.0.0.src-orig/tools/lto/CMakeLists.txt	2021-09-24 12:18:10.000000000 -0400
++++ llvm-13.0.0.src/tools/lto/CMakeLists.txt	2022-03-31 17:07:07.000000000 -0400
+@@ -25,7 +25,7 @@
+     set(LTO_LIBRARY_TYPE MODULE)
+     set(LTO_LIBRARY_NAME libLTO)
+   else()
+-    set(LTO_LIBRARY_TYPE SHARED)
++    set(LTO_LIBRARY_TYPE STATIC)
+     set(LTO_LIBRARY_NAME LTO)
+ endif()
+
diff --git a/conda-recipes/llvm14-remove-use-of-clonefile.patch b/conda-recipes/llvm14-remove-use-of-clonefile.patch
new file mode 100644
index 000000000..6ef9c9d61
--- /dev/null
+++ b/conda-recipes/llvm14-remove-use-of-clonefile.patch
@@ -0,0 +1,54 @@
+diff -ur a/llvm-14.0.6.src/lib/Support/Unix/Path.inc b/llvm-14.0.6.src/lib/Support/Unix/Path.inc
+--- a/llvm-14.0.6.src/lib/Support/Unix/Path.inc	2022-03-14 05:44:55.000000000 -0400
++++ b/llvm-14.0.6.src/lib/Support/Unix/Path.inc	2022-09-19 11:30:59.000000000 -0400
+@@ -1462,6 +1462,7 @@
+ std::error_code copy_file(const Twine &From, const Twine &To) {
+   std::string FromS = From.str();
+   std::string ToS = To.str();
++  /*
+ #if __has_builtin(__builtin_available)
+   if (__builtin_available(macos 10.12, *)) {
+     // Optimistically try to use clonefile() and handle errors, rather than
+@@ -1490,6 +1491,7 @@
+     // cheaper.
+   }
+ #endif
++  */
+   if (!copyfile(FromS.c_str(), ToS.c_str(), /*State=*/NULL, COPYFILE_DATA))
+     return std::error_code();
+   return std::error_code(errno, std::generic_category());
+diff -ur a/llvm-14.0.6.src/unittests/Support/Path.cpp b/llvm-14.0.6.src/unittests/Support/Path.cpp
+--- a/llvm-14.0.6.src/unittests/Support/Path.cpp	2022-03-14 05:44:55.000000000 -0400
++++ b/llvm-14.0.6.src/unittests/Support/Path.cpp	2022-09-19 11:33:07.000000000 -0400
+@@ -2267,15 +2267,15 @@
+
+   EXPECT_EQ(fs::setPermissions(TempPath, fs::set_uid_on_exe), NoError);
+   EXPECT_TRUE(CheckPermissions(fs::set_uid_on_exe));
+-
++#if !defined(__APPLE__)
+   EXPECT_EQ(fs::setPermissions(TempPath, fs::set_gid_on_exe), NoError);
+   EXPECT_TRUE(CheckPermissions(fs::set_gid_on_exe));
+-
++#endif
+   // Modern BSDs require root to set the sticky bit on files.
+   // AIX and Solaris without root will mask off (i.e., lose) the sticky bit
+   // on files.
+ #if !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) &&  \
+-    !defined(_AIX) && !(defined(__sun__) && defined(__svr4__))
++    !defined(_AIX) && !(defined(__sun__) && defined(__svr4__)) && !defined(__APPLE__)
+   EXPECT_EQ(fs::setPermissions(TempPath, fs::sticky_bit), NoError);
+   EXPECT_TRUE(CheckPermissions(fs::sticky_bit));
+
+@@ -2297,10 +2297,12 @@
+   EXPECT_TRUE(CheckPermissions(fs::all_perms));
+ #endif // !FreeBSD && !NetBSD && !OpenBSD && !AIX
+
++#if !defined(__APPLE__)
+   EXPECT_EQ(fs::setPermissions(TempPath, fs::all_perms & ~fs::sticky_bit),
+                                NoError);
+   EXPECT_TRUE(CheckPermissions(fs::all_perms & ~fs::sticky_bit));
+ #endif
++#endif
+ }
+
+ #ifdef _WIN32
diff --git a/conda-recipes/llvm14-svml.patch b/conda-recipes/llvm14-svml.patch
new file mode 100644
index 000000000..cdce26b34
--- /dev/null
+++ b/conda-recipes/llvm14-svml.patch
@@ -0,0 +1,2192 @@
+From bc2dcd190b7148d04772fa7fcd18b5200b758d4a Mon Sep 17 00:00:00 2001
+From: Ivan Butygin <ivan.butygin@gmail.com>
+Date: Sun, 24 Jul 2022 20:31:29 +0200
+Subject: [PATCH] Fixes vectorizer and extends SVML support
+
+Patch was updated to fix SVML calling convention issues uncovered by llvm 10.
+In previous versions of patch SVML calling convention was selected based on
+compilation settings. So if you try to call 256bit vector function from avx512
+code function will be called with avx512 cc which is incorrect. To fix this
+SVML cc was separated into 3 different cc for 128, 256 and 512bit vector lengths
+which are selected based on actual input vector length.
+
+Original patch merged several fixes:
+
+1. https://reviews.llvm.org/D47188 patch fixes the problem with improper calls
+to SVML library as it has non-standard calling conventions. So accordingly it
+has SVML calling conventions definitions and code to set CC to the vectorized
+calls. As SVML provides several implementations for the math functions we also
+took into consideration fast attribute and select more fast implementation in
+such case. This work is based on original Matt Masten's work.
+Author: Denis Nagorny
+
+2. https://reviews.llvm.org/D53035 patch implements support to legalize SVML
+calls by breaking down the illegal vector call instruction into multiple legal
+vector call instructions during code generation. Currently the vectorizer does
+not check legality of the generated SVML (or any VECLIB) call instructions, and
+this can lead to potential problems even during vector type legalization. This
+patch addresses this issue by adding a legality check during code generation and
+replaces the illegal SVML call with corresponding legalized instructions.
+(RFC: http://lists.llvm.org/pipermail/llvm-dev/2018-June/124357.html)
+Author: Karthik Senthil
+---
+ .../include/llvm/Analysis/TargetLibraryInfo.h |  22 +-
+ llvm/include/llvm/AsmParser/LLToken.h         |   3 +
+ llvm/include/llvm/IR/CMakeLists.txt           |   4 +
+ llvm/include/llvm/IR/CallingConv.h            |   5 +
+ llvm/include/llvm/IR/SVML.td                  |  62 +++
+ llvm/lib/Analysis/CMakeLists.txt              |   1 +
+ llvm/lib/Analysis/TargetLibraryInfo.cpp       |  55 +-
+ llvm/lib/AsmParser/LLLexer.cpp                |   3 +
+ llvm/lib/AsmParser/LLParser.cpp               |   6 +
+ llvm/lib/CodeGen/ReplaceWithVeclib.cpp        |   2 +-
+ llvm/lib/IR/AsmWriter.cpp                     |   3 +
+ llvm/lib/IR/Verifier.cpp                      |   3 +
+ llvm/lib/Target/X86/X86CallingConv.td         |  70 +++
+ llvm/lib/Target/X86/X86ISelLowering.cpp       |   3 +-
+ llvm/lib/Target/X86/X86RegisterInfo.cpp       |  46 ++
+ llvm/lib/Target/X86/X86Subtarget.h            |   3 +
+ .../Transforms/Utils/InjectTLIMappings.cpp    |   2 +-
+ .../Transforms/Vectorize/LoopVectorize.cpp    | 269 +++++++++
+ .../Generic/replace-intrinsics-with-veclib.ll |   4 +-
+ .../LoopVectorize/X86/svml-calls-finite.ll    |  24 +-
+ .../LoopVectorize/X86/svml-calls.ll           | 108 ++--
+ .../LoopVectorize/X86/svml-legal-calls.ll     | 513 ++++++++++++++++++
+ .../LoopVectorize/X86/svml-legal-codegen.ll   |  61 +++
+ llvm/test/Transforms/Util/add-TLI-mappings.ll |  18 +-
+ llvm/utils/TableGen/CMakeLists.txt            |   1 +
+ llvm/utils/TableGen/SVMLEmitter.cpp           | 110 ++++
+ llvm/utils/TableGen/TableGen.cpp              |   8 +-
+ llvm/utils/TableGen/TableGenBackends.h        |   1 +
+ llvm/utils/vim/syntax/llvm.vim                |   1 +
+ 29 files changed, 1341 insertions(+), 70 deletions(-)
+ create mode 100644 llvm/include/llvm/IR/SVML.td
+ create mode 100644 llvm/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll
+ create mode 100644 llvm/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll
+ create mode 100644 llvm/utils/TableGen/SVMLEmitter.cpp
+
+diff --git a/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h b/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h
+index 17d1e3f770c14..110ff08189867 100644
+--- a/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h
++++ b/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h
+@@ -39,6 +39,12 @@ struct VecDesc {
+     NotLibFunc
+   };
+
++enum SVMLAccuracy {
++  SVML_DEFAULT,
++  SVML_HA,
++  SVML_EP
++};
++
+ /// Implementation of the target library information.
+ ///
+ /// This class constructs tables that hold the target library information and
+@@ -157,7 +163,7 @@ class TargetLibraryInfoImpl {
+   /// Return true if the function F has a vector equivalent with vectorization
+   /// factor VF.
+   bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const {
+-    return !getVectorizedFunction(F, VF).empty();
++    return !getVectorizedFunction(F, VF, false).empty();
+   }
+
+   /// Return true if the function F has a vector equivalent with any
+@@ -166,7 +172,10 @@ class TargetLibraryInfoImpl {
+
+   /// Return the name of the equivalent of F, vectorized with factor VF. If no
+   /// such mapping exists, return the empty string.
+-  StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const;
++  std::string getVectorizedFunction(StringRef F, const ElementCount &VF, bool IsFast) const;
++
++  Optional<CallingConv::ID> getVectorizedFunctionCallingConv(
++    StringRef F, const FunctionType &FTy, const DataLayout &DL) const;
+
+   /// Set to true iff i32 parameters to library functions should have signext
+   /// or zeroext attributes if they correspond to C-level int or unsigned int,
+@@ -326,8 +335,13 @@ class TargetLibraryInfo {
+   bool isFunctionVectorizable(StringRef F) const {
+     return Impl->isFunctionVectorizable(F);
+   }
+-  StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const {
+-    return Impl->getVectorizedFunction(F, VF);
++  std::string getVectorizedFunction(StringRef F, const ElementCount &VF, bool IsFast) const {
++    return Impl->getVectorizedFunction(F, VF, IsFast);
++  }
++
++  Optional<CallingConv::ID> getVectorizedFunctionCallingConv(
++    StringRef F, const FunctionType &FTy, const DataLayout &DL) const {
++    return Impl->getVectorizedFunctionCallingConv(F, FTy, DL);
+   }
+
+   /// Tests if the function is both available and a candidate for optimized code
+diff --git a/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h b/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h
+index 78ebb35e0ea4d..3ffb57db8b18b 100644
+--- a/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h
++++ b/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h
+@@ -133,6 +133,9 @@ enum Kind {
+   kw_fastcc,
+   kw_coldcc,
+   kw_intel_ocl_bicc,
++  kw_intel_svmlcc128,
++  kw_intel_svmlcc256,
++  kw_intel_svmlcc512,
+   kw_cfguard_checkcc,
+   kw_x86_stdcallcc,
+   kw_x86_fastcallcc,
+diff --git a/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt b/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt
+index 0498fc269b634..23bb3de41bc1a 100644
+--- a/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt
++++ b/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt
+@@ -20,3 +20,7 @@ tablegen(LLVM IntrinsicsX86.h -gen-intrinsic-enums -intrinsic-prefix=x86)
+ tablegen(LLVM IntrinsicsXCore.h -gen-intrinsic-enums -intrinsic-prefix=xcore)
+ tablegen(LLVM IntrinsicsVE.h -gen-intrinsic-enums -intrinsic-prefix=ve)
+ add_public_tablegen_target(intrinsics_gen)
++
++set(LLVM_TARGET_DEFINITIONS SVML.td)
++tablegen(LLVM SVML.inc -gen-svml)
++add_public_tablegen_target(svml_gen)
+diff --git a/llvm-14.0.6.src/include/llvm/IR/CallingConv.h b/llvm-14.0.6.src/include/llvm/IR/CallingConv.h
+index fd28542465225..096eea1a8e19b 100644
+--- a/llvm-14.0.6.src/include/llvm/IR/CallingConv.h
++++ b/llvm-14.0.6.src/include/llvm/IR/CallingConv.h
+@@ -252,6 +252,11 @@ namespace CallingConv {
+     /// M68k_INTR - Calling convention used for M68k interrupt routines.
+     M68k_INTR = 101,
+
++    /// Intel_SVML - Calling conventions for Intel Short Math Vector Library
++    Intel_SVML128 = 102,
++    Intel_SVML256 = 103,
++    Intel_SVML512 = 104,
++
+     /// The highest possible calling convention ID. Must be some 2^k - 1.
+     MaxID = 1023
+   };
+diff --git a/llvm-14.0.6.src/include/llvm/IR/SVML.td b/llvm-14.0.6.src/include/llvm/IR/SVML.td
+new file mode 100644
+index 0000000000000..5af710404c9d9
+--- /dev/null
++++ b/llvm-14.0.6.src/include/llvm/IR/SVML.td
+@@ -0,0 +1,62 @@
++//===-- Intel_SVML.td - Defines SVML call variants ---------*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file is used by TableGen to define the different typs of SVML function
++// variants used with -fveclib=SVML.
++//
++//===----------------------------------------------------------------------===//
++
++class SvmlVariant;
++
++def sin        : SvmlVariant;
++def cos        : SvmlVariant;
++def pow        : SvmlVariant;
++def exp        : SvmlVariant;
++def log        : SvmlVariant;
++def acos       : SvmlVariant;
++def acosh      : SvmlVariant;
++def asin       : SvmlVariant;
++def asinh      : SvmlVariant;
++def atan2      : SvmlVariant;
++def atan       : SvmlVariant;
++def atanh      : SvmlVariant;
++def cbrt       : SvmlVariant;
++def cdfnorm    : SvmlVariant;
++def cdfnorminv : SvmlVariant;
++def cosd       : SvmlVariant;
++def cosh       : SvmlVariant;
++def erf        : SvmlVariant;
++def erfc       : SvmlVariant;
++def erfcinv    : SvmlVariant;
++def erfinv     : SvmlVariant;
++def exp10      : SvmlVariant;
++def exp2       : SvmlVariant;
++def expm1      : SvmlVariant;
++def hypot      : SvmlVariant;
++def invsqrt    : SvmlVariant;
++def log10      : SvmlVariant;
++def log1p      : SvmlVariant;
++def log2       : SvmlVariant;
++def sind       : SvmlVariant;
++def sinh       : SvmlVariant;
++def sqrt       : SvmlVariant;
++def tan        : SvmlVariant;
++def tanh       : SvmlVariant;
++
++// TODO: SVML does not currently provide _ha and _ep variants of these fucnctions.
++// We should call the default variant of these functions in all cases instead.
++
++// def nearbyint  : SvmlVariant;
++// def logb       : SvmlVariant;
++// def floor      : SvmlVariant;
++// def fmod       : SvmlVariant;
++// def ceil       : SvmlVariant;
++// def trunc      : SvmlVariant;
++// def rint       : SvmlVariant;
++// def round      : SvmlVariant;
+diff --git a/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt b/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt
+index aec84124129f4..98286e166fbe2 100644
+--- a/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt
++++ b/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt
+@@ -150,6 +150,7 @@ add_llvm_component_library(LLVMAnalysis
+   DEPENDS
+   intrinsics_gen
+   ${MLDeps}
++  svml_gen
+
+   LINK_LIBS
+   ${MLLinkDeps}
+diff --git a/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp b/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp
+index 02923c2c7eb14..83abde28a62a4 100644
+--- a/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp
++++ b/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp
+@@ -110,6 +110,11 @@ bool TargetLibraryInfoImpl::isCallingConvCCompatible(Function *F) {
+                                     F->getFunctionType());
+ }
+
++static std::string svmlMangle(StringRef FnName, const bool IsFast) {
++  std::string FullName = FnName.str();
++  return IsFast ? FullName : FullName + "_ha";
++}
++
+ /// Initialize the set of available library functions based on the specified
+ /// target triple. This should be carefully written so that a missing target
+ /// triple gets a sane set of defaults.
+@@ -1876,8 +1881,9 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
+   }
+   case SVML: {
+     const VecDesc VecFuncs[] = {
+-    #define TLI_DEFINE_SVML_VECFUNCS
+-    #include "llvm/Analysis/VecFuncs.def"
++    #define GET_SVML_VARIANTS
++    #include "llvm/IR/SVML.inc"
++    #undef GET_SVML_VARIANTS
+     };
+     addVectorizableFunctions(VecFuncs);
+     break;
+@@ -1897,20 +1903,51 @@ bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
+   return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName;
+ }
+
+-StringRef
+-TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
+-                                             const ElementCount &VF) const {
++std::string TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
++                                                         const ElementCount &VF,
++                                                         bool IsFast) const {
++  bool FromSVML = ClVectorLibrary == SVML;
+   F = sanitizeFunctionName(F);
+   if (F.empty())
+-    return F;
++    return F.str();
+   std::vector<VecDesc>::const_iterator I =
+       llvm::lower_bound(VectorDescs, F, compareWithScalarFnName);
+   while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) {
+-    if (I->VectorizationFactor == VF)
+-      return I->VectorFnName;
++    if (I->VectorizationFactor == VF) {
++      if (FromSVML) {
++        return svmlMangle(I->VectorFnName, IsFast);
++      }
++      return I->VectorFnName.str();
++    }
+     ++I;
+   }
+-  return StringRef();
++  return std::string();
++}
++
++static CallingConv::ID getSVMLCallingConv(const DataLayout &DL, const FunctionType &FType)
++{
++  assert(isa<VectorType>(FType.getReturnType()));
++  auto *VecCallRetType = cast<VectorType>(FType.getReturnType());
++  auto TypeBitWidth = DL.getTypeSizeInBits(VecCallRetType);
++  if (TypeBitWidth == 128) {
++    return CallingConv::Intel_SVML128;
++  } else if (TypeBitWidth == 256) {
++    return CallingConv::Intel_SVML256;
++  } else if (TypeBitWidth == 512) {
++    return CallingConv::Intel_SVML512;
++  } else {
++    llvm_unreachable("Invalid vector width");
++  }
++  return 0; // not reachable
++}
++
++Optional<CallingConv::ID>
++TargetLibraryInfoImpl::getVectorizedFunctionCallingConv(
++    StringRef F, const FunctionType &FTy, const DataLayout &DL) const {
++  if (F.startswith("__svml")) {
++    return getSVMLCallingConv(DL, FTy);
++  }
++  return {};
+ }
+
+ TargetLibraryInfo TargetLibraryAnalysis::run(const Function &F,
+diff --git a/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp b/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp
+index e3bf41c9721b6..4f9dccd4e0724 100644
+--- a/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp
++++ b/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp
+@@ -603,6 +603,9 @@ lltok::Kind LLLexer::LexIdentifier() {
+   KEYWORD(spir_kernel);
+   KEYWORD(spir_func);
+   KEYWORD(intel_ocl_bicc);
++  KEYWORD(intel_svmlcc128);
++  KEYWORD(intel_svmlcc256);
++  KEYWORD(intel_svmlcc512);
+   KEYWORD(x86_64_sysvcc);
+   KEYWORD(win64cc);
+   KEYWORD(x86_regcallcc);
+diff --git a/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp b/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp
+index 432ec151cf8ae..3bd6ee61024b8 100644
+--- a/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp
++++ b/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp
+@@ -1781,6 +1781,9 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
+ ///   ::= 'ccc'
+ ///   ::= 'fastcc'
+ ///   ::= 'intel_ocl_bicc'
++///   ::= 'intel_svmlcc128'
++///   ::= 'intel_svmlcc256'
++///   ::= 'intel_svmlcc512'
+ ///   ::= 'coldcc'
+ ///   ::= 'cfguard_checkcc'
+ ///   ::= 'x86_stdcallcc'
+@@ -1850,6 +1853,9 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
+   case lltok::kw_spir_kernel:    CC = CallingConv::SPIR_KERNEL; break;
+   case lltok::kw_spir_func:      CC = CallingConv::SPIR_FUNC; break;
+   case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break;
++  case lltok::kw_intel_svmlcc128:CC = CallingConv::Intel_SVML128; break;
++  case lltok::kw_intel_svmlcc256:CC = CallingConv::Intel_SVML256; break;
++  case lltok::kw_intel_svmlcc512:CC = CallingConv::Intel_SVML512; break;
+   case lltok::kw_x86_64_sysvcc:  CC = CallingConv::X86_64_SysV; break;
+   case lltok::kw_win64cc:        CC = CallingConv::Win64; break;
+   case lltok::kw_webkit_jscc:    CC = CallingConv::WebKit_JS; break;
+diff --git a/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp
+index 0ff045fa787e8..175651949ef85 100644
+--- a/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp
++++ b/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp
+@@ -157,7 +157,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
+   // and the exact vector width of the call operands in the
+   // TargetLibraryInfo.
+   const std::string TLIName =
+-      std::string(TLI.getVectorizedFunction(ScalarName, VF));
++      std::string(TLI.getVectorizedFunction(ScalarName, VF, CI.getFastMathFlags().isFast()));
+
+   LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
+                     << ScalarName << "` and vector width " << VF << ".\n");
+diff --git a/llvm-14.0.6.src/lib/IR/AsmWriter.cpp b/llvm-14.0.6.src/lib/IR/AsmWriter.cpp
+index 179754e275b03..c4e95752c97e8 100644
+--- a/llvm-14.0.6.src/lib/IR/AsmWriter.cpp
++++ b/llvm-14.0.6.src/lib/IR/AsmWriter.cpp
+@@ -306,6 +306,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
+   case CallingConv::X86_RegCall:   Out << "x86_regcallcc"; break;
+   case CallingConv::X86_VectorCall:Out << "x86_vectorcallcc"; break;
+   case CallingConv::Intel_OCL_BI:  Out << "intel_ocl_bicc"; break;
++  case CallingConv::Intel_SVML128: Out << "intel_svmlcc128"; break;
++  case CallingConv::Intel_SVML256: Out << "intel_svmlcc256"; break;
++  case CallingConv::Intel_SVML512: Out << "intel_svmlcc512"; break;
+   case CallingConv::ARM_APCS:      Out << "arm_apcscc"; break;
+   case CallingConv::ARM_AAPCS:     Out << "arm_aapcscc"; break;
+   case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break;
+diff --git a/llvm-14.0.6.src/lib/IR/Verifier.cpp b/llvm-14.0.6.src/lib/IR/Verifier.cpp
+index 989d01e2e3950..bae7382a36e13 100644
+--- a/llvm-14.0.6.src/lib/IR/Verifier.cpp
++++ b/llvm-14.0.6.src/lib/IR/Verifier.cpp
+@@ -2457,6 +2457,9 @@ void Verifier::visitFunction(const Function &F) {
+   case CallingConv::Fast:
+   case CallingConv::Cold:
+   case CallingConv::Intel_OCL_BI:
++  case CallingConv::Intel_SVML128:
++  case CallingConv::Intel_SVML256:
++  case CallingConv::Intel_SVML512:
+   case CallingConv::PTX_Kernel:
+   case CallingConv::PTX_Device:
+     Assert(!F.isVarArg(), "Calling convention does not support varargs or "
+diff --git a/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td b/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td
+index 4dd8a6cdd8982..12e65521215e4 100644
+--- a/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td
++++ b/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td
+@@ -498,6 +498,21 @@ def RetCC_X86_64 : CallingConv<[
+   CCDelegateTo<RetCC_X86_64_C>
+ ]>;
+
++// Intel_SVML return-value convention.
++def RetCC_Intel_SVML : CallingConv<[
++  // Vector types are returned in XMM0,XMM1
++  CCIfType<[v4f32, v2f64],
++            CCAssignToReg<[XMM0,XMM1]>>,
++
++  // 256-bit FP vectors
++  CCIfType<[v8f32, v4f64],
++            CCAssignToReg<[YMM0,YMM1]>>,
++
++  // 512-bit FP vectors
++  CCIfType<[v16f32, v8f64],
++            CCAssignToReg<[ZMM0,ZMM1]>>
++]>;
++
+ // This is the return-value convention used for the entire X86 backend.
+ let Entry = 1 in
+ def RetCC_X86 : CallingConv<[
+@@ -505,6 +520,10 @@ def RetCC_X86 : CallingConv<[
+   // Check if this is the Intel OpenCL built-ins calling convention
+   CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<RetCC_Intel_OCL_BI>>,
+
++  CCIfCC<"CallingConv::Intel_SVML128", CCDelegateTo<RetCC_Intel_SVML>>,
++  CCIfCC<"CallingConv::Intel_SVML256", CCDelegateTo<RetCC_Intel_SVML>>,
++  CCIfCC<"CallingConv::Intel_SVML512", CCDelegateTo<RetCC_Intel_SVML>>,
++
+   CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
+   CCDelegateTo<RetCC_X86_32>
+ ]>;
+@@ -1064,6 +1083,30 @@ def CC_Intel_OCL_BI : CallingConv<[
+   CCDelegateTo<CC_X86_32_C>
+ ]>;
+
++// X86-64 Intel Short Vector Math Library calling convention.
++def CC_Intel_SVML : CallingConv<[
++
++  // The SSE vector arguments are passed in XMM registers.
++  CCIfType<[v4f32, v2f64],
++           CCAssignToReg<[XMM0, XMM1, XMM2]>>,
++
++  // The 256-bit vector arguments are passed in YMM registers.
++  CCIfType<[v8f32, v4f64],
++           CCAssignToReg<[YMM0, YMM1, YMM2]>>,
++
++  // The 512-bit vector arguments are passed in ZMM registers.
++  CCIfType<[v16f32, v8f64],
++           CCAssignToReg<[ZMM0, ZMM1, ZMM2]>>
++]>;
++
++def CC_X86_32_Intr : CallingConv<[
++  CCAssignToStack<4, 4>
++]>;
++
++def CC_X86_64_Intr : CallingConv<[
++  CCAssignToStack<8, 8>
++]>;
++
+ //===----------------------------------------------------------------------===//
+ // X86 Root Argument Calling Conventions
+ //===----------------------------------------------------------------------===//
+@@ -1115,6 +1158,9 @@ def CC_X86_64 : CallingConv<[
+ let Entry = 1 in
+ def CC_X86 : CallingConv<[
+   CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<CC_Intel_OCL_BI>>,
++  CCIfCC<"CallingConv::Intel_SVML128", CCDelegateTo<CC_Intel_SVML>>,
++  CCIfCC<"CallingConv::Intel_SVML256", CCDelegateTo<CC_Intel_SVML>>,
++  CCIfCC<"CallingConv::Intel_SVML512", CCDelegateTo<CC_Intel_SVML>>,
+   CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
+   CCDelegateTo<CC_X86_32>
+ ]>;
+@@ -1227,3 +1273,27 @@ def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP,
+                                                (sequence "R%u", 12, 15))>;
+ def CSR_SysV64_RegCall       : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,
+                                                (sequence "XMM%u", 8, 15))>;
++
++// SVML calling convention
++def CSR_32_Intel_SVML        : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE)>;
++def CSR_32_Intel_SVML_AVX512 : CalleeSavedRegs<(add CSR_32_Intel_SVML,
++                                                K4, K5, K6, K7)>;
++
++def CSR_64_Intel_SVML_NoSSE : CalleeSavedRegs<(add RBX, RSI, RDI, RBP, RSP, R12, R13, R14, R15)>;
++
++def CSR_64_Intel_SVML       : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
++                                               (sequence "XMM%u", 8, 15))>;
++def CSR_Win64_Intel_SVML    : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
++                                               (sequence "XMM%u", 6, 15))>;
++
++def CSR_64_Intel_SVML_AVX        : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
++                                                    (sequence "YMM%u", 8, 15))>;
++def CSR_Win64_Intel_SVML_AVX     : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
++                                                    (sequence "YMM%u", 6, 15))>;
++
++def CSR_64_Intel_SVML_AVX512     : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
++                                                    (sequence "ZMM%u", 16, 31),
++                                                    K4, K5, K6, K7)>;
++def CSR_Win64_Intel_SVML_AVX512  : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
++                                                    (sequence "ZMM%u", 6, 21),
++                                                    K4, K5, K6, K7)>;
+diff --git a/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp b/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp
+index 8bb7e81e19bbd..1780ce3fc6467 100644
+--- a/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp
++++ b/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp
+@@ -3788,7 +3788,8 @@ void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
+   // FIXME: Only some x86_32 calling conventions support AVX512.
+   if (Subtarget.useAVX512Regs() &&
+       (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
+-                     CallConv == CallingConv::Intel_OCL_BI)))
++                     CallConv == CallingConv::Intel_OCL_BI   ||
++                     CallConv == CallingConv::Intel_SVML512)))
+     VecVT = MVT::v16f32;
+   else if (Subtarget.hasAVX())
+     VecVT = MVT::v8f32;
+diff --git a/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp b/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp
+index 130cb61cdde24..9eec3b25ca9f2 100644
+--- a/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp
++++ b/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp
+@@ -272,6 +272,42 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+   }
+ }
+
++namespace {
++std::pair<const uint32_t *, const MCPhysReg *> getSVMLRegMaskAndSaveList(
++  bool Is64Bit, bool IsWin64, CallingConv::ID CC) {
++  assert(CC >= CallingConv::Intel_SVML128 && CC <= CallingConv::Intel_SVML512);
++  unsigned Abi = CC - CallingConv::Intel_SVML128 ; // 0 - 128, 1 - 256, 2 - 512
++
++  const std::pair<const uint32_t *, const MCPhysReg *> Abi64[] = {
++    std::make_pair(CSR_64_Intel_SVML_RegMask,        CSR_64_Intel_SVML_SaveList),
++    std::make_pair(CSR_64_Intel_SVML_AVX_RegMask,    CSR_64_Intel_SVML_AVX_SaveList),
++    std::make_pair(CSR_64_Intel_SVML_AVX512_RegMask, CSR_64_Intel_SVML_AVX512_SaveList),
++  };
++
++  const std::pair<const uint32_t *, const MCPhysReg *> AbiWin64[] = {
++    std::make_pair(CSR_Win64_Intel_SVML_RegMask,        CSR_Win64_Intel_SVML_SaveList),
++    std::make_pair(CSR_Win64_Intel_SVML_AVX_RegMask,    CSR_Win64_Intel_SVML_AVX_SaveList),
++    std::make_pair(CSR_Win64_Intel_SVML_AVX512_RegMask, CSR_Win64_Intel_SVML_AVX512_SaveList),
++  };
++
++  const std::pair<const uint32_t *, const MCPhysReg *> Abi32[] = {
++    std::make_pair(CSR_32_Intel_SVML_RegMask,        CSR_32_Intel_SVML_SaveList),
++    std::make_pair(CSR_32_Intel_SVML_RegMask,        CSR_32_Intel_SVML_SaveList),
++    std::make_pair(CSR_32_Intel_SVML_AVX512_RegMask, CSR_32_Intel_SVML_AVX512_SaveList),
++  };
++
++  if (Is64Bit) {
++    if (IsWin64) {
++      return AbiWin64[Abi];
++    } else {
++      return Abi64[Abi];
++    }
++  } else {
++    return Abi32[Abi];
++  }
++}
++}
++
+ const MCPhysReg *
+ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+   assert(MF && "MachineFunction required");
+@@ -327,6 +363,11 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+       return CSR_64_Intel_OCL_BI_SaveList;
+     break;
+   }
++  case CallingConv::Intel_SVML128:
++  case CallingConv::Intel_SVML256:
++  case CallingConv::Intel_SVML512: {
++    return getSVMLRegMaskAndSaveList(Is64Bit, IsWin64, CC).second;
++  }
+   case CallingConv::HHVM:
+     return CSR_64_HHVM_SaveList;
+   case CallingConv::X86_RegCall:
+@@ -449,6 +490,11 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+       return CSR_64_Intel_OCL_BI_RegMask;
+     break;
+   }
++  case CallingConv::Intel_SVML128:
++  case CallingConv::Intel_SVML256:
++  case CallingConv::Intel_SVML512: {
++    return getSVMLRegMaskAndSaveList(Is64Bit, IsWin64, CC).first;
++  }
+   case CallingConv::HHVM:
+     return CSR_64_HHVM_RegMask;
+   case CallingConv::X86_RegCall:
+diff --git a/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h b/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h
+index 5d773f0c57dfb..6bdf5bc6f3fe9 100644
+--- a/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h
++++ b/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h
+@@ -916,6 +916,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
+     case CallingConv::X86_ThisCall:
+     case CallingConv::X86_VectorCall:
+     case CallingConv::Intel_OCL_BI:
++    case CallingConv::Intel_SVML128:
++    case CallingConv::Intel_SVML256:
++    case CallingConv::Intel_SVML512:
+       return isTargetWin64();
+     // This convention allows using the Win64 convention on other targets.
+     case CallingConv::Win64:
+diff --git a/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp
+index 047bf5569ded3..59897785f156c 100644
+--- a/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp
++++ b/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp
+@@ -92,7 +92,7 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
+
+   auto AddVariantDecl = [&](const ElementCount &VF) {
+     const std::string TLIName =
+-        std::string(TLI.getVectorizedFunction(ScalarName, VF));
++        std::string(TLI.getVectorizedFunction(ScalarName, VF, CI.getFastMathFlags().isFast()));
+     if (!TLIName.empty()) {
+       std::string MangledName =
+           VFABI::mangleTLIVectorName(TLIName, ScalarName, CI.arg_size(), VF);
+diff --git a/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp
+index 46ff0994e04e7..f472af5e1a835 100644
+--- a/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp
++++ b/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp
+@@ -712,6 +712,27 @@ class InnerLoopVectorizer {
+   virtual void printDebugTracesAtStart(){};
+   virtual void printDebugTracesAtEnd(){};
+
++  /// Check legality of given SVML call instruction \p VecCall generated for
++  /// scalar call \p Call. If illegal then the appropriate legal instruction
++  /// is returned.
++  Value *legalizeSVMLCall(CallInst *VecCall, CallInst *Call);
++
++  /// Returns the legal VF for a call instruction \p CI using TTI information
++  /// and vector type.
++  ElementCount getLegalVFForCall(CallInst *CI);
++
++  /// Partially vectorize a given call \p Call by breaking it down into multiple
++  /// calls of \p LegalCall, decided by the variant VF \p LegalVF.
++  Value *partialVectorizeCall(CallInst *Call, CallInst *LegalCall,
++                              unsigned LegalVF);
++
++  /// Generate shufflevector instruction for a vector value \p V based on the
++  /// current \p Part and a smaller VF \p LegalVF.
++  Value *generateShuffleValue(Value *V, unsigned LegalVF, unsigned Part);
++
++  /// Combine partially vectorized calls stored in \p CallResults.
++  Value *combinePartialVecCalls(SmallVectorImpl<Value *> &CallResults);
++
+   /// The original loop.
+   Loop *OrigLoop;
+
+@@ -4596,6 +4617,17 @@ static bool mayDivideByZero(Instruction &I) {
+   return !CInt || CInt->isZero();
+ }
+
++static void setVectorFunctionCallingConv(CallInst &CI, const DataLayout &DL,
++                                         const TargetLibraryInfo &TLI) {
++  Function *VectorF = CI.getCalledFunction();
++  FunctionType *FTy = VectorF->getFunctionType();
++  StringRef VFName = VectorF->getName();
++  auto CC = TLI.getVectorizedFunctionCallingConv(VFName, *FTy, DL);
++  if (CC) {
++    CI.setCallingConv(*CC);
++  }
++}
++
+ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
+                                                VPUser &ArgOperands,
+                                                VPTransformState &State) {
+@@ -4664,9 +4696,246 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
+       if (isa<FPMathOperator>(V))
+         V->copyFastMathFlags(CI);
+
++    const DataLayout &DL = V->getModule()->getDataLayout();
++    setVectorFunctionCallingConv(*V, DL, *TLI);
++
++    // Perform legalization of SVML call instruction only if original call
++    // was not Intrinsic
++    if (!UseVectorIntrinsic &&
++        (V->getCalledFunction()->getName()).startswith("__svml")) {
++      // assert((V->getCalledFunction()->getName()).startswith("__svml"));
++      LLVM_DEBUG(dbgs() << "LV(SVML): Vector call inst:"; V->dump());
++      auto *LegalV = cast<Instruction>(legalizeSVMLCall(V, CI));
++      LLVM_DEBUG(dbgs() << "LV: Completed SVML legalization.\n LegalV: ";
++                 LegalV->dump());
++      State.set(Def, LegalV, Part);
++      addMetadata(LegalV, &I);
++    } else {
+       State.set(Def, V, Part);
+       addMetadata(V, &I);
++    }
++  }
++}
++
++//===----------------------------------------------------------------------===//
++// Implementation of functions for SVML vector call legalization.
++//===----------------------------------------------------------------------===//
++//
++// Unlike other VECLIBs, SVML needs to be used with target-legal
++// vector types. Otherwise, link failures and/or runtime failures
++// will occur. A motivating example could be -
++//
++//   double *a;
++//   float *b;
++//   #pragma clang loop vectorize_width(8)
++//   for(i = 0; i < N; ++i) {
++//     a[i] = sin(i);   // Legal SVML VF must be 4 or below on AVX
++//     b[i] = cosf(i);  // VF can be 8 on AVX since 8 floats can fit in YMM
++//    }
++//
++// Current implementation of vector code generation in LV is
++// driven based on a single VF (in InnerLoopVectorizer::VF). This
++// inhibits the flexibility of adjusting/choosing different VF
++// for different instructions.
++//
++// Due to this limitation it is much more straightforward to
++// first generate the illegal sin8 (svml_sin8 for SVML vector
++// library) call and then legalize it than trying to avoid
++// generating illegal code from the beginning.
++//
++// A solution for this problem is to check legality of the
++// call instruction right after generating it in vectorizer and
++// if it is illegal we split the call arguments and issue multiple
++// calls to match the legal VF. This is demonstrated currently for
++// the SVML vector library calls (non-intrinsic version only).
++//
++// Future directions and extensions:
++// 1) This legalization example shows us that a good direction
++//    for the VPlan framework would be to model the vector call
++//    instructions in a way that legal VF for each call is chosen
++//    correctly within vectorizer and illegal code generation is
++//    avoided.
++// 2) This logic can also be extended to general vector functions
++//    i.e. legalization OpenMP decalre simd functions. The
++//    requirements needed for this will be documented soon.
++
++Value *InnerLoopVectorizer::legalizeSVMLCall(CallInst *VecCall,
++                                             CallInst *Call) {
++  ElementCount LegalVF = getLegalVFForCall(VecCall);
++
++  assert(LegalVF.getKnownMinValue() > 1 &&
++         "Legal VF for SVML call must be greater than 1 to vectorize");
++
++  if (LegalVF == VF)
++    return VecCall;
++  else if (LegalVF.getKnownMinValue() > VF.getKnownMinValue())
++    // TODO: handle case when we are underfilling vectors
++    return VecCall;
++
++  // Legal VF for this SVML call is smaller than chosen VF, break it down into
++  // smaller call instructions
++
++  // Convert args, types and return type to match legal VF
++  SmallVector<Type *, 4> NewTys;
++  SmallVector<Value *, 4> NewArgs;
++
++  for (Value *ArgOperand : Call->args()) {
++    Type *Ty = ToVectorTy(ArgOperand->getType(), LegalVF);
++    NewTys.push_back(Ty);
++    NewArgs.push_back(UndefValue::get(Ty));
+   }
++
++  // Construct legal vector function
++  const VFShape Shape =
++    VFShape::get(*Call, LegalVF /*EC*/, false /*HasGlobalPred*/);
++  Function *LegalVectorF = VFDatabase(*Call).getVectorizedFunction(Shape);
++  assert(LegalVectorF != nullptr && "Can't create legal vector function.");
++
++  LLVM_DEBUG(dbgs() << "LV(SVML): LegalVectorF: "; LegalVectorF->dump());
++
++  SmallVector<OperandBundleDef, 1> OpBundles;
++  Call->getOperandBundlesAsDefs(OpBundles);
++  auto LegalV = std::unique_ptr<CallInst>(CallInst::Create(LegalVectorF, NewArgs, OpBundles));
++
++  if (isa<FPMathOperator>(LegalV))
++    LegalV->copyFastMathFlags(Call);
++
++  const DataLayout &DL = VecCall->getModule()->getDataLayout();
++  // Set SVML calling conventions
++  setVectorFunctionCallingConv(*LegalV, DL, *TLI);
++
++  LLVM_DEBUG(dbgs() << "LV(SVML): LegalV: "; LegalV->dump());
++
++  Value *LegalizedCall = partialVectorizeCall(VecCall, LegalV.get(), LegalVF.getKnownMinValue());
++
++  LLVM_DEBUG(dbgs() << "LV(SVML): LegalizedCall: "; LegalizedCall->dump());
++
++  // Remove the illegal call from Builder
++  VecCall->eraseFromParent();
++
++  return LegalizedCall;
++}
++
++ElementCount InnerLoopVectorizer::getLegalVFForCall(CallInst *CI) {
++  const DataLayout DL = CI->getModule()->getDataLayout();
++  FunctionType *CallFT = CI->getFunctionType();
++  // All functions that need legalization should have a vector return type.
++  // This is true for all SVML functions that are currently supported.
++  assert(isa<VectorType>(CallFT->getReturnType()) &&
++         "Return type of call that needs legalization is not a vector.");
++  auto *VecCallRetType = cast<VectorType>(CallFT->getReturnType());
++  Type *ElemType = VecCallRetType->getElementType();
++
++  unsigned TypeBitWidth = DL.getTypeSizeInBits(ElemType);
++  unsigned VectorBitWidth = TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector);
++  unsigned LegalVF = VectorBitWidth / TypeBitWidth;
++
++  LLVM_DEBUG(dbgs() << "LV(SVML): Type Bit Width: " << TypeBitWidth << "\n");
++  LLVM_DEBUG(dbgs() << "LV(SVML): Current VL: " << VF << "\n");
++  LLVM_DEBUG(dbgs() << "LV(SVML): Vector Bit Width: " << VectorBitWidth
++                    << "\n");
++  LLVM_DEBUG(dbgs() << "LV(SVML): Legal Target VL: " << LegalVF << "\n");
++
++  return ElementCount::getFixed(LegalVF);
++}
++
++// Partial vectorization of a call instruction is achieved by making clones of
++// \p LegalCall and overwriting its argument operands with shufflevector
++// equivalent decided based on \p LegalVF and current Part being filled.
++Value *InnerLoopVectorizer::partialVectorizeCall(CallInst *Call,
++                                                 CallInst *LegalCall,
++                                                 unsigned LegalVF) {
++  unsigned NumParts = VF.getKnownMinValue() / LegalVF;
++  LLVM_DEBUG(dbgs() << "LV(SVML): NumParts: " << NumParts << "\n");
++  SmallVector<Value *, 8> CallResults;
++
++  for (unsigned Part = 0; Part < NumParts; ++Part) {
++    auto *ClonedCall = cast<CallInst>(LegalCall->clone());
++
++    // Update the arg operand of cloned call to shufflevector
++    for (unsigned i = 0, ie = Call->arg_size(); i != ie; ++i) {
++      auto *NewOp = generateShuffleValue(Call->getArgOperand(i), LegalVF, Part);
++      ClonedCall->setArgOperand(i, NewOp);
++    }
++
++    LLVM_DEBUG(dbgs() << "LV(SVML): ClonedCall: "; ClonedCall->dump());
++
++    auto *PartialVecCall = Builder.Insert(ClonedCall);
++    CallResults.push_back(PartialVecCall);
++  }
++
++  return combinePartialVecCalls(CallResults);
++}
++
++Value *InnerLoopVectorizer::generateShuffleValue(Value *V, unsigned LegalVF,
++                                                 unsigned Part) {
++  // Example:
++  // Consider the following vector code -
++  // %1 = sitofp <4 x i32> %0 to <4 x double>
++  // %2 = call <4 x double> @__svml_sin4(<4 x double> %1)
++  //
++  // If the LegalVF is 2, we partially vectorize the sin4 call by invoking
++  // generateShuffleValue on the operand %1
++  // If Part = 1, output value is -
++  // %shuffle = shufflevector <4 x double> %1, <4 x double> undef, <2 x i32><i32 0, i32 1>
++  // and if Part = 2, output is -
++  // %shuffle7 =shufflevector <4 x double> %1, <4 x double> undef, <2 x i32><i32 2, i32 3>
++
++  assert(isa<VectorType>(V->getType()) &&
++         "Cannot generate shuffles for non-vector values.");
++  SmallVector<int, 4> ShuffleMask;
++  Value *Undef = UndefValue::get(V->getType());
++
++  unsigned ElemIdx = Part * LegalVF;
++
++  for (unsigned K = 0; K < LegalVF; K++)
++    ShuffleMask.push_back(static_cast<int>(ElemIdx + K));
++
++  auto *ShuffleInst =
++      Builder.CreateShuffleVector(V, Undef, ShuffleMask, "shuffle");
++
++  return ShuffleInst;
++}
++
++// Results of the calls executed by smaller legal call instructions must be
++// combined to match the original VF for later use. This is done by constructing
++// shufflevector instructions in a cumulative fashion.
++Value *InnerLoopVectorizer::combinePartialVecCalls(
++    SmallVectorImpl<Value *> &CallResults) {
++  assert(isa<VectorType>(CallResults[0]->getType()) &&
++         "Cannot combine calls with non-vector results.");
++  auto *CallType = cast<VectorType>(CallResults[0]->getType());
++
++  Value *CombinedShuffle;
++  unsigned NumElems = CallType->getElementCount().getKnownMinValue() * 2;
++  unsigned NumRegs = CallResults.size();
++
++  assert(NumRegs >= 2 && isPowerOf2_32(NumRegs) &&
++         "Number of partial vector calls to combine must be a power of 2 "
++         "(atleast 2^1)");
++
++  while (NumRegs > 1) {
++    for (unsigned I = 0; I < NumRegs; I += 2) {
++      SmallVector<int, 4> ShuffleMask;
++      for (unsigned J = 0; J < NumElems; J++)
++        ShuffleMask.push_back(static_cast<int>(J));
++
++      CombinedShuffle = Builder.CreateShuffleVector(
++          CallResults[I], CallResults[I + 1], ShuffleMask, "combined");
++      LLVM_DEBUG(dbgs() << "LV(SVML): CombinedShuffle:";
++                 CombinedShuffle->dump());
++      CallResults.push_back(CombinedShuffle);
++    }
++
++    SmallVector<Value *, 2>::iterator Start = CallResults.begin();
++    SmallVector<Value *, 2>::iterator End = Start + NumRegs;
++    CallResults.erase(Start, End);
++
++    NumElems *= 2;
++    NumRegs /= 2;
++  }
++
++  return CombinedShuffle;
+ }
+
+ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
+diff --git a/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll b/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
+index df8b7c498bd00..63a36549f18fd 100644
+--- a/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
++++ b/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
+@@ -10,7 +10,7 @@ target triple = "x86_64-unknown-linux-gnu"
+ define <4 x double> @exp_v4(<4 x double> %in) {
+ ; SVML-LABEL: define {{[^@]+}}@exp_v4
+ ; SVML-SAME: (<4 x double> [[IN:%.*]]) {
+-; SVML-NEXT:    [[TMP1:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[IN]])
++; SVML-NEXT:    [[TMP1:%.*]] = call <4 x double> @__svml_exp4_ha(<4 x double> [[IN]])
+ ; SVML-NEXT:    ret <4 x double> [[TMP1]]
+ ;
+ ; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_v4
+@@ -37,7 +37,7 @@ declare <4 x double> @llvm.exp.v4f64(<4 x double>) #0
+ define <4 x float> @exp_f32(<4 x float> %in) {
+ ; SVML-LABEL: define {{[^@]+}}@exp_f32
+ ; SVML-SAME: (<4 x float> [[IN:%.*]]) {
+-; SVML-NEXT:    [[TMP1:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[IN]])
++; SVML-NEXT:    [[TMP1:%.*]] = call <4 x float> @__svml_expf4_ha(<4 x float> [[IN]])
+ ; SVML-NEXT:    ret <4 x float> [[TMP1]]
+ ;
+ ; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_f32
+diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
+index a6e191c3d6923..d6e2e11106949 100644
+--- a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
++++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
+@@ -39,7 +39,8 @@ for.end:                                          ; preds = %for.body
+ declare double @__exp_finite(double) #0
+
+ ; CHECK-LABEL: @exp_f64
+-; CHECK: <4 x double> @__svml_exp4
++; CHECK: <2 x double> @__svml_exp2
++; CHECK: <2 x double> @__svml_exp2
+ ; CHECK: ret
+ define void @exp_f64(double* nocapture %varray) {
+ entry:
+@@ -99,7 +100,8 @@ for.end:                                          ; preds = %for.body
+ declare double @__log_finite(double) #0
+
+ ; CHECK-LABEL: @log_f64
+-; CHECK: <4 x double> @__svml_log4
++; CHECK: <2 x double> @__svml_log2
++; CHECK: <2 x double> @__svml_log2
+ ; CHECK: ret
+ define void @log_f64(double* nocapture %varray) {
+ entry:
+@@ -159,7 +161,8 @@ for.end:                                          ; preds = %for.body
+ declare double @__pow_finite(double, double) #0
+
+ ; CHECK-LABEL: @pow_f64
+-; CHECK: <4 x double> @__svml_pow4
++; CHECK: <2 x double> @__svml_pow2
++; CHECK: <2 x double> @__svml_pow2
+ ; CHECK: ret
+ define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
+ entry:
+@@ -190,7 +193,8 @@ declare float @__exp2f_finite(float) #0
+
+ define void @exp2f_finite(float* nocapture %varray) {
+ ; CHECK-LABEL: @exp2f_finite(
+-; CHECK:    call <4 x float> @__svml_exp2f4(<4 x float> %{{.*}})
++; CHECK:    call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> %{{.*}})
++; CHECK:    call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> %{{.*}})
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -219,7 +223,8 @@ declare double @__exp2_finite(double) #0
+
+ define void @exp2_finite(double* nocapture %varray) {
+ ; CHECK-LABEL: @exp2_finite(
+-; CHECK:    call <4 x double> @__svml_exp24(<4 x double> {{.*}})
++; CHECK:    call intel_svmlcc128 <2 x double> @__svml_exp22_ha(<2 x double> {{.*}})
++; CHECK:    call intel_svmlcc128 <2 x double> @__svml_exp22_ha(<2 x double> {{.*}})
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -276,7 +281,8 @@ for.end:                                          ; preds = %for.body
+ declare double @__log2_finite(double) #0
+
+ ; CHECK-LABEL: @log2_f64
+-; CHECK: <4 x double> @__svml_log24
++; CHECK: <2 x double> @__svml_log22
++; CHECK: <2 x double> @__svml_log22
+ ; CHECK: ret
+ define void @log2_f64(double* nocapture %varray) {
+ entry:
+@@ -333,7 +339,8 @@ for.end:                                          ; preds = %for.body
+ declare double @__log10_finite(double) #0
+
+ ; CHECK-LABEL: @log10_f64
+-; CHECK: <4 x double> @__svml_log104
++; CHECK: <2 x double> @__svml_log102
++; CHECK: <2 x double> @__svml_log102
+ ; CHECK: ret
+ define void @log10_f64(double* nocapture %varray) {
+ entry:
+@@ -390,7 +397,8 @@ for.end:                                          ; preds = %for.body
+ declare double @__sqrt_finite(double) #0
+
+ ; CHECK-LABEL: @sqrt_f64
+-; CHECK: <4 x double> @__svml_sqrt4
++; CHECK: <2 x double> @__svml_sqrt2
++; CHECK: <2 x double> @__svml_sqrt2
+ ; CHECK: ret
+ define void @sqrt_f64(double* nocapture %varray) {
+ entry:
+diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll
+index 42c280df6ad02..088bbdcf1aa4a 100644
+--- a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll
++++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll
+@@ -48,7 +48,7 @@ declare float @llvm.exp2.f32(float) #0
+
+ define void @sin_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @sin_f64(
+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -71,7 +71,7 @@ for.end:
+
+ define void @sin_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @sin_f32(
+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_sinf4_ha(<4 x float> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -94,7 +94,7 @@ for.end:
+
+ define void @sin_f64_intrinsic(double* nocapture %varray) {
+ ; CHECK-LABEL: @sin_f64_intrinsic(
+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -117,7 +117,7 @@ for.end:
+
+ define void @sin_f32_intrinsic(float* nocapture %varray) {
+ ; CHECK-LABEL: @sin_f32_intrinsic(
+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_sinf4_ha(<4 x float> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -140,7 +140,7 @@ for.end:
+
+ define void @cos_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @cos_f64(
+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -163,7 +163,7 @@ for.end:
+
+ define void @cos_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @cos_f32(
+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_cosf4_ha(<4 x float> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -186,7 +186,7 @@ for.end:
+
+ define void @cos_f64_intrinsic(double* nocapture %varray) {
+ ; CHECK-LABEL: @cos_f64_intrinsic(
+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -209,7 +209,7 @@ for.end:
+
+ define void @cos_f32_intrinsic(float* nocapture %varray) {
+ ; CHECK-LABEL: @cos_f32_intrinsic(
+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_cosf4_ha(<4 x float> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -232,7 +232,7 @@ for.end:
+
+ define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
+ ; CHECK-LABEL: @pow_f64(
+-; CHECK:    [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
++; CHECK:    [[TMP8:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -257,7 +257,7 @@ for.end:
+
+ define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
+ ; CHECK-LABEL: @pow_f64_intrinsic(
+-; CHECK:    [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
++; CHECK:    [[TMP8:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -282,7 +282,7 @@ for.end:
+
+ define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
+ ; CHECK-LABEL: @pow_f32(
+-; CHECK:    [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
++; CHECK:    [[TMP8:%.*]] = call intel_svmlcc128 <4 x float> @__svml_powf4_ha(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -307,7 +307,7 @@ for.end:
+
+ define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
+ ; CHECK-LABEL: @pow_f32_intrinsic(
+-; CHECK:    [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
++; CHECK:    [[TMP8:%.*]] = call intel_svmlcc128 <4 x float> @__svml_powf4_ha(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -332,7 +332,7 @@ for.end:
+
+ define void @exp_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @exp_f64(
+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -355,7 +355,7 @@ for.end:
+
+ define void @exp_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @exp_f32(
+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_expf4_ha(<4 x float> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -378,7 +378,7 @@ for.end:
+
+ define void @exp_f64_intrinsic(double* nocapture %varray) {
+ ; CHECK-LABEL: @exp_f64_intrinsic(
+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -401,7 +401,7 @@ for.end:
+
+ define void @exp_f32_intrinsic(float* nocapture %varray) {
+ ; CHECK-LABEL: @exp_f32_intrinsic(
+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_expf4_ha(<4 x float> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -424,7 +424,7 @@ for.end:
+
+ define void @log_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @log_f64(
+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -447,7 +447,7 @@ for.end:
+
+ define void @log_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @log_f32(
+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_logf4_ha(<4 x float> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -470,7 +470,7 @@ for.end:
+
+ define void @log_f64_intrinsic(double* nocapture %varray) {
+ ; CHECK-LABEL: @log_f64_intrinsic(
+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -493,7 +493,7 @@ for.end:
+
+ define void @log_f32_intrinsic(float* nocapture %varray) {
+ ; CHECK-LABEL: @log_f32_intrinsic(
+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_logf4_ha(<4 x float> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -516,7 +516,7 @@ for.end:
+
+ define void @log2_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @log2_f64(
+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log24_ha(<4 x double> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -539,7 +539,7 @@ for.end:
+
+ define void @log2_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @log2_f32(
+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log2f4_ha(<4 x float> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -562,7 +562,7 @@ for.end:
+
+ define void @log2_f64_intrinsic(double* nocapture %varray) {
+ ; CHECK-LABEL: @log2_f64_intrinsic(
+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log24_ha(<4 x double> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -585,7 +585,7 @@ for.end:
+
+ define void @log2_f32_intrinsic(float* nocapture %varray) {
+ ; CHECK-LABEL: @log2_f32_intrinsic(
+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log2f4_ha(<4 x float> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -608,7 +608,7 @@ for.end:
+
+ define void @log10_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @log10_f64(
+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log104_ha(<4 x double> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -631,7 +631,7 @@ for.end:
+
+ define void @log10_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @log10_f32(
+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log10f4_ha(<4 x float> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -654,7 +654,7 @@ for.end:
+
+ define void @log10_f64_intrinsic(double* nocapture %varray) {
+ ; CHECK-LABEL: @log10_f64_intrinsic(
+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log104_ha(<4 x double> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -677,7 +677,7 @@ for.end:
+
+ define void @log10_f32_intrinsic(float* nocapture %varray) {
+ ; CHECK-LABEL: @log10_f32_intrinsic(
+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log10f4_ha(<4 x float> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -700,7 +700,7 @@ for.end:
+
+ define void @sqrt_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @sqrt_f64(
+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sqrt4(<4 x double> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sqrt4_ha(<4 x double> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -723,7 +723,7 @@ for.end:
+
+ define void @sqrt_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @sqrt_f32(
+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sqrtf4(<4 x float> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_sqrtf4_ha(<4 x float> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -746,7 +746,7 @@ for.end:
+
+ define void @exp2_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @exp2_f64(
+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp24_ha(<4 x double> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -769,7 +769,7 @@ for.end:
+
+ define void @exp2_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @exp2_f32(
+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -792,7 +792,7 @@ for.end:
+
+ define void @exp2_f64_intrinsic(double* nocapture %varray) {
+ ; CHECK-LABEL: @exp2_f64_intrinsic(
+-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp24_ha(<4 x double> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -815,7 +815,7 @@ for.end:
+
+ define void @exp2_f32_intrinsic(float* nocapture %varray) {
+ ; CHECK-LABEL: @exp2_f32_intrinsic(
+-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
++; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> [[TMP4:%.*]])
+ ; CHECK:    ret void
+ ;
+ entry:
+@@ -836,4 +836,44 @@ for.end:
+   ret void
+ }
+
++; CHECK-LABEL: @atan2_finite
++; CHECK: intel_svmlcc256 <4 x double> @__svml_atan24(
++; CHECK: intel_svmlcc256 <4 x double> @__svml_atan24(
++; CHECK: ret
++
++declare double @__atan2_finite(double, double) local_unnamed_addr #0
++
++define void @atan2_finite([100 x double]* nocapture %varray) local_unnamed_addr #0 {
++entry:
++  br label %for.cond1.preheader
++
++for.cond1.preheader:                              ; preds = %for.inc7, %entry
++  %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for.inc7 ]
++  %0 = trunc i64 %indvars.iv19 to i32
++  %conv = sitofp i32 %0 to double
++  br label %for.body3
++
++for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
++  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
++  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
++  %1 = trunc i64 %indvars.iv.next to i32
++  %conv4 = sitofp i32 %1 to double
++  %call = tail call fast double @__atan2_finite(double %conv, double %conv4)
++  %arrayidx6 = getelementptr inbounds [100 x double], [100 x double]* %varray, i64 %indvars.iv19, i64 %indvars.iv
++  store double %call, double* %arrayidx6, align 8
++  %exitcond = icmp eq i64 %indvars.iv.next, 100
++  br i1 %exitcond, label %for.inc7, label %for.body3, !llvm.loop !5
++
++for.inc7:                                         ; preds = %for.body3
++  %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
++  %exitcond21 = icmp eq i64 %indvars.iv.next20, 100
++  br i1 %exitcond21, label %for.end9, label %for.cond1.preheader
++
++for.end9:                                         ; preds = %for.inc7
++  ret void
++}
++
+ attributes #0 = { nounwind readnone }
++!5 = distinct !{!5, !6, !7}
++!6 = !{!"llvm.loop.vectorize.width", i32 8}
++!7 = !{!"llvm.loop.vectorize.enable", i1 true}
+diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll
+new file mode 100644
+index 0000000000000..326c763994343
+--- /dev/null
++++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll
+@@ -0,0 +1,513 @@
++; Check legalization of SVML calls, including intrinsic versions (like @llvm.<fn_name>.<type>).
++
++; RUN: opt -vector-library=SVML -inject-tli-mappings -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s
++
++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
++target triple = "x86_64-unknown-linux-gnu"
++
++declare double @sin(double) #0
++declare float @sinf(float) #0
++declare double @llvm.sin.f64(double) #0
++declare float @llvm.sin.f32(float) #0
++
++declare double @cos(double) #0
++declare float @cosf(float) #0
++declare double @llvm.cos.f64(double) #0
++declare float @llvm.cos.f32(float) #0
++
++declare double @pow(double, double) #0
++declare float @powf(float, float) #0
++declare double @llvm.pow.f64(double, double) #0
++declare float @llvm.pow.f32(float, float) #0
++
++declare double @exp(double) #0
++declare float @expf(float) #0
++declare double @llvm.exp.f64(double) #0
++declare float @llvm.exp.f32(float) #0
++
++declare double @log(double) #0
++declare float @logf(float) #0
++declare double @llvm.log.f64(double) #0
++declare float @llvm.log.f32(float) #0
++
++
++define void @sin_f64(double* nocapture %varray) {
++; CHECK-LABEL: @sin_f64(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP2:%.*]])
++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to double
++  %call = tail call double @sin(double %conv)
++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
++  store double %call, double* %arrayidx, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @sin_f32(float* nocapture %varray) {
++; CHECK-LABEL: @sin_f32(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_sinf8_ha(<8 x float> [[TMP2:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to float
++  %call = tail call float @sinf(float %conv)
++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
++  store float %call, float* %arrayidx, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @sin_f64_intrinsic(double* nocapture %varray) {
++; CHECK-LABEL: @sin_f64_intrinsic(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP2:%.*]])
++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to double
++  %call = tail call double @llvm.sin.f64(double %conv)
++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
++  store double %call, double* %arrayidx, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @sin_f32_intrinsic(float* nocapture %varray) {
++; CHECK-LABEL: @sin_f32_intrinsic(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_sinf8_ha(<8 x float> [[TMP2:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to float
++  %call = tail call float @llvm.sin.f32(float %conv)
++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
++  store float %call, float* %arrayidx, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @cos_f64(double* nocapture %varray) {
++; CHECK-LABEL: @cos_f64(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP2:%.*]])
++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to double
++  %call = tail call double @cos(double %conv)
++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
++  store double %call, double* %arrayidx, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @cos_f32(float* nocapture %varray) {
++; CHECK-LABEL: @cos_f32(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_cosf8_ha(<8 x float> [[TMP2:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to float
++  %call = tail call float @cosf(float %conv)
++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
++  store float %call, float* %arrayidx, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @cos_f64_intrinsic(double* nocapture %varray) {
++; CHECK-LABEL: @cos_f64_intrinsic(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP2:%.*]])
++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to double
++  %call = tail call double @llvm.cos.f64(double %conv)
++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
++  store double %call, double* %arrayidx, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @cos_f32_intrinsic(float* nocapture %varray) {
++; CHECK-LABEL: @cos_f32_intrinsic(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_cosf8_ha(<8 x float> [[TMP2:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to float
++  %call = tail call float @llvm.cos.f32(float %conv)
++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
++  store float %call, float* %arrayidx, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
++; CHECK-LABEL: @pow_f64(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP2:%.*]], <4 x double> [[TMP3:%.*]])
++; CHECK:    [[TMP4:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP5:%.*]], <4 x double> [[TMP6:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to double
++  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
++  %tmp1 = load double, double* %arrayidx, align 4
++  %tmp2 = tail call double @pow(double %conv, double %tmp1)
++  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
++  store double %tmp2, double* %arrayidx2, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
++; CHECK-LABEL: @pow_f64_intrinsic(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP2:%.*]], <4 x double> [[TMP3:%.*]])
++; CHECK:    [[TMP4:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP5:%.*]], <4 x double> [[TMP6:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to double
++  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
++  %tmp1 = load double, double* %arrayidx, align 4
++  %tmp2 = tail call double @llvm.pow.f64(double %conv, double %tmp1)
++  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
++  store double %tmp2, double* %arrayidx2, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
++; CHECK-LABEL: @pow_f32(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_powf8_ha(<8 x float> [[TMP2:%.*]], <8 x float> [[WIDE_LOAD:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to float
++  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
++  %tmp1 = load float, float* %arrayidx, align 4
++  %tmp2 = tail call float @powf(float %conv, float %tmp1)
++  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
++  store float %tmp2, float* %arrayidx2, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
++; CHECK-LABEL: @pow_f32_intrinsic(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_powf8_ha(<8 x float> [[TMP2:%.*]], <8 x float> [[TMP3:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to float
++  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
++  %tmp1 = load float, float* %arrayidx, align 4
++  %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1)
++  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
++  store float %tmp2, float* %arrayidx2, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @exp_f64(double* nocapture %varray) {
++; CHECK-LABEL: @exp_f64(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP2:%.*]])
++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to double
++  %call = tail call double @exp(double %conv)
++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
++  store double %call, double* %arrayidx, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @exp_f32(float* nocapture %varray) {
++; CHECK-LABEL: @exp_f32(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_expf8_ha(<8 x float> [[TMP2:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to float
++  %call = tail call float @expf(float %conv)
++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
++  store float %call, float* %arrayidx, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @exp_f64_intrinsic(double* nocapture %varray) {
++; CHECK-LABEL: @exp_f64_intrinsic(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP2:%.*]])
++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to double
++  %call = tail call double @llvm.exp.f64(double %conv)
++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
++  store double %call, double* %arrayidx, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @exp_f32_intrinsic(float* nocapture %varray) {
++; CHECK-LABEL: @exp_f32_intrinsic(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_expf8_ha(<8 x float> [[TMP2:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to float
++  %call = tail call float @llvm.exp.f32(float %conv)
++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
++  store float %call, float* %arrayidx, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @log_f64(double* nocapture %varray) {
++; CHECK-LABEL: @log_f64(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP2:%.*]])
++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to double
++  %call = tail call double @log(double %conv)
++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
++  store double %call, double* %arrayidx, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @log_f32(float* nocapture %varray) {
++; CHECK-LABEL: @log_f32(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_logf8_ha(<8 x float> [[TMP2:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to float
++  %call = tail call float @logf(float %conv)
++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
++  store float %call, float* %arrayidx, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @log_f64_intrinsic(double* nocapture %varray) {
++; CHECK-LABEL: @log_f64_intrinsic(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP2:%.*]])
++; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to double
++  %call = tail call double @llvm.log.f64(double %conv)
++  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
++  store double %call, double* %arrayidx, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++define void @log_f32_intrinsic(float* nocapture %varray) {
++; CHECK-LABEL: @log_f32_intrinsic(
++; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_logf8_ha(<8 x float> [[TMP2:%.*]])
++; CHECK:    ret void
++;
++entry:
++  br label %for.body
++
++for.body:
++  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
++  %tmp = trunc i64 %iv to i32
++  %conv = sitofp i32 %tmp to float
++  %call = tail call float @llvm.log.f32(float %conv)
++  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
++  store float %call, float* %arrayidx, align 4
++  %iv.next = add nuw nsw i64 %iv, 1
++  %exitcond = icmp eq i64 %iv.next, 1000
++  br i1 %exitcond, label %for.end, label %for.body
++
++for.end:
++  ret void
++}
++
++attributes #0 = { nounwind readnone }
++
+diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll
+new file mode 100644
+index 0000000000000..9422653445dc2
+--- /dev/null
++++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll
+@@ -0,0 +1,61 @@
++; Check that vector codegen splits illegal sin8 call to two sin4 calls on AVX for double datatype.
++; The C code used to generate this test:
++
++; #include <math.h>
++;
++; void foo(double *a, int N){
++;   int i;
++; #pragma clang loop vectorize_width(8)
++;   for (i=0;i<N;i++){
++;     a[i] = sin(i);
++;   }
++; }
++
++; RUN: opt -vector-library=SVML -inject-tli-mappings -loop-vectorize -force-vector-width=8 -mattr=avx -S < %s | FileCheck %s
++
++; CHECK: [[I1:%.*]] = sitofp <8 x i32> [[I0:%.*]] to <8 x double>
++; CHECK-NEXT: [[S1:%shuffle.*]] = shufflevector <8 x double> [[I1]], <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
++; CHECK-NEXT: [[I2:%.*]] = call fast intel_svmlcc256 <4 x double> @__svml_sin4(<4 x double> [[S1]])
++; CHECK-NEXT: [[S2:%shuffle.*]] = shufflevector <8 x double> [[I1]], <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
++; CHECK-NEXT: [[I3:%.*]] = call fast intel_svmlcc256 <4 x double> @__svml_sin4(<4 x double> [[S2]])
++; CHECK-NEXT: [[comb:%combined.*]] = shufflevector <4 x double> [[I2]], <4 x double> [[I3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
++; CHECK: store <8 x double> [[comb]], <8 x double>* [[TMP:%.*]], align 8
++
++
++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
++target triple = "x86_64-unknown-linux-gnu"
++
++; Function Attrs: nounwind uwtable
++define dso_local void @foo(double* nocapture %a, i32 %N) local_unnamed_addr #0 {
++entry:
++  %cmp5 = icmp sgt i32 %N, 0
++  br i1 %cmp5, label %for.body.preheader, label %for.end
++
++for.body.preheader:                               ; preds = %entry
++  %wide.trip.count = zext i32 %N to i64
++  br label %for.body
++
++for.body:                                         ; preds = %for.body, %for.body.preheader
++  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
++  %0 = trunc i64 %indvars.iv to i32
++  %conv = sitofp i32 %0 to double
++  %call = tail call fast double @sin(double %conv) #2
++  %arrayidx = getelementptr inbounds double, double* %a, i64 %indvars.iv
++  store double %call, double* %arrayidx, align 8, !tbaa !2
++  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
++  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
++  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !6
++
++for.end:                                          ; preds = %for.body, %entry
++  ret void
++}
++
++; Function Attrs: nounwind
++declare dso_local double @sin(double) local_unnamed_addr #1
++
++!2 = !{!3, !3, i64 0}
++!3 = !{!"double", !4, i64 0}
++!4 = !{!"omnipotent char", !5, i64 0}
++!5 = !{!"Simple C/C++ TBAA"}
++!6 = distinct !{!6, !7}
++!7 = !{!"llvm.loop.vectorize.width", i32 8}
+diff --git a/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll b/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll
+index e8c83c4d9bd1f..615fdc29176a2 100644
+--- a/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll
++++ b/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll
+@@ -12,12 +12,12 @@ target triple = "x86_64-unknown-linux-gnu"
+
+ ; COMMON-LABEL: @llvm.compiler.used = appending global
+ ; SVML-SAME:        [6 x i8*] [
+-; SVML-SAME:          i8* bitcast (<2 x double> (<2 x double>)* @__svml_sin2 to i8*),
+-; SVML-SAME:          i8* bitcast (<4 x double> (<4 x double>)* @__svml_sin4 to i8*),
+-; SVML-SAME:          i8* bitcast (<8 x double> (<8 x double>)* @__svml_sin8 to i8*),
+-; SVML-SAME:          i8* bitcast (<4 x float> (<4 x float>)* @__svml_log10f4 to i8*),
+-; SVML-SAME:          i8* bitcast (<8 x float> (<8 x float>)* @__svml_log10f8 to i8*),
+-; SVML-SAME:          i8* bitcast (<16 x float> (<16 x float>)* @__svml_log10f16 to i8*)
++; SVML-SAME:          i8* bitcast (<2 x double> (<2 x double>)* @__svml_sin2_ha to i8*),
++; SVML-SAME:          i8* bitcast (<4 x double> (<4 x double>)* @__svml_sin4_ha to i8*),
++; SVML-SAME:          i8* bitcast (<8 x double> (<8 x double>)* @__svml_sin8_ha to i8*),
++; SVML-SAME:          i8* bitcast (<4 x float> (<4 x float>)* @__svml_log10f4_ha to i8*),
++; SVML-SAME:          i8* bitcast (<8 x float> (<8 x float>)* @__svml_log10f8_ha to i8*),
++; SVML-SAME:          i8* bitcast (<16 x float> (<16 x float>)* @__svml_log10f16_ha to i8*)
+ ; MASSV-SAME:       [2 x i8*] [
+ ; MASSV-SAME:         i8* bitcast (<2 x double> (<2 x double>)* @__sind2 to i8*),
+ ; MASSV-SAME:         i8* bitcast (<4 x float> (<4 x float>)* @__log10f4 to i8*)
+@@ -59,9 +59,9 @@ declare float @llvm.log10.f32(float) #0
+ attributes #0 = { nounwind readnone }
+
+ ; SVML:      attributes #[[SIN]] = { "vector-function-abi-variant"=
+-; SVML-SAME:   "_ZGV_LLVM_N2v_sin(__svml_sin2),
+-; SVML-SAME:   _ZGV_LLVM_N4v_sin(__svml_sin4),
+-; SVML-SAME:   _ZGV_LLVM_N8v_sin(__svml_sin8)" }
++; SVML-SAME:   "_ZGV_LLVM_N2v_sin(__svml_sin2_ha),
++; SVML-SAME:   _ZGV_LLVM_N4v_sin(__svml_sin4_ha),
++; SVML-SAME:   _ZGV_LLVM_N8v_sin(__svml_sin8_ha)" }
+
+ ; MASSV:      attributes #[[SIN]] = { "vector-function-abi-variant"=
+ ; MASSV-SAME:   "_ZGV_LLVM_N2v_sin(__sind2)" }
+diff --git a/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt b/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt
+index 97df6a55d1b59..199e0285c9e5d 100644
+--- a/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt
++++ b/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt
+@@ -47,6 +47,7 @@ add_tablegen(llvm-tblgen LLVM
+   SearchableTableEmitter.cpp
+   SubtargetEmitter.cpp
+   SubtargetFeatureInfo.cpp
++  SVMLEmitter.cpp
+   TableGen.cpp
+   Types.cpp
+   X86DisassemblerTables.cpp
+diff --git a/llvm-14.0.6.src/utils/TableGen/SVMLEmitter.cpp b/llvm-14.0.6.src/utils/TableGen/SVMLEmitter.cpp
+new file mode 100644
+index 0000000000000..a5aeea48db28b
+--- /dev/null
++++ b/llvm-14.0.6.src/utils/TableGen/SVMLEmitter.cpp
+@@ -0,0 +1,110 @@
++//===------ SVMLEmitter.cpp - Generate SVML function variants -------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This tablegen backend emits the scalar to svml function map for TLI.
++//
++//===----------------------------------------------------------------------===//
++
++#include "CodeGenTarget.h"
++#include "llvm/Support/Format.h"
++#include "llvm/TableGen/Error.h"
++#include "llvm/TableGen/Record.h"
++#include "llvm/TableGen/TableGenBackend.h"
++#include <map>
++#include <vector>
++
++using namespace llvm;
++
++#define DEBUG_TYPE "SVMLVariants"
++#include "llvm/Support/Debug.h"
++
++namespace {
++
++class SVMLVariantsEmitter {
++
++  RecordKeeper &Records;
++
++private:
++  void emitSVMLVariants(raw_ostream &OS);
++
++public:
++  SVMLVariantsEmitter(RecordKeeper &R) : Records(R) {}
++
++  void run(raw_ostream &OS);
++};
++} // End anonymous namespace
++
++/// \brief Emit the set of SVML variant function names.
++// The default is to emit the high accuracy SVML variants until a mechanism is
++// introduced to allow a selection of different variants through precision
++// requirements specified by the user. This code generates mappings to svml
++// that are in the scalar form of llvm intrinsics, math library calls, or the
++// finite variants of math library calls.
++void SVMLVariantsEmitter::emitSVMLVariants(raw_ostream &OS) {
++
++  const unsigned MinSinglePrecVL = 4;
++  const unsigned MaxSinglePrecVL = 16;
++  const unsigned MinDoublePrecVL = 2;
++  const unsigned MaxDoublePrecVL = 8;
++
++  OS << "#ifdef GET_SVML_VARIANTS\n";
++
++  for (const auto &D : Records.getAllDerivedDefinitions("SvmlVariant")) {
++    StringRef SvmlVariantNameStr = D->getName();
++    // Single Precision SVML
++    for (unsigned VL = MinSinglePrecVL; VL <= MaxSinglePrecVL; VL *= 2) {
++      // Emit the scalar math library function to svml function entry.
++      OS << "{\"" << SvmlVariantNameStr << "f" << "\", ";
++      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
++         << "ElementCount::getFixed(" << VL << ")},\n";
++
++      // Emit the scalar intrinsic to svml function entry.
++      OS << "{\"" << "llvm." << SvmlVariantNameStr << ".f32" << "\", ";
++      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
++         << "ElementCount::getFixed(" << VL << ")},\n";
++
++      // Emit the finite math library function to svml function entry.
++      OS << "{\"__" << SvmlVariantNameStr << "f_finite" << "\", ";
++      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
++         << "ElementCount::getFixed(" << VL << ")},\n";
++    }
++
++    // Double Precision SVML
++    for (unsigned VL = MinDoublePrecVL; VL <= MaxDoublePrecVL; VL *= 2) {
++      // Emit the scalar math library function to svml function entry.
++      OS << "{\"" << SvmlVariantNameStr << "\", ";
++      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " << "ElementCount::getFixed(" << VL
++         << ")},\n";
++
++      // Emit the scalar intrinsic to svml function entry.
++      OS << "{\"" << "llvm." << SvmlVariantNameStr << ".f64" << "\", ";
++      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " << "ElementCount::getFixed(" << VL
++         << ")},\n";
++
++      // Emit the finite math library function to svml function entry.
++      OS << "{\"__" << SvmlVariantNameStr << "_finite" << "\", ";
++      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", "
++         << "ElementCount::getFixed(" << VL << ")},\n";
++    }
++  }
++
++  OS << "#endif // GET_SVML_VARIANTS\n\n";
++}
++
++void SVMLVariantsEmitter::run(raw_ostream &OS) {
++  emitSVMLVariants(OS);
++}
++
++namespace llvm {
++
++void EmitSVMLVariants(RecordKeeper &RK, raw_ostream &OS) {
++  SVMLVariantsEmitter(RK).run(OS);
++}
++
++} // End llvm namespace
+diff --git a/llvm-14.0.6.src/utils/TableGen/TableGen.cpp b/llvm-14.0.6.src/utils/TableGen/TableGen.cpp
+index 2d4a45f889be6..603d0c223b33a 100644
+--- a/llvm-14.0.6.src/utils/TableGen/TableGen.cpp
++++ b/llvm-14.0.6.src/utils/TableGen/TableGen.cpp
+@@ -57,6 +57,7 @@ enum ActionType {
+   GenAutomata,
+   GenDirectivesEnumDecl,
+   GenDirectivesEnumImpl,
++  GenSVMLVariants,
+ };
+
+ namespace llvm {
+@@ -138,7 +139,9 @@ cl::opt<ActionType> Action(
+         clEnumValN(GenDirectivesEnumDecl, "gen-directive-decl",
+                    "Generate directive related declaration code (header file)"),
+         clEnumValN(GenDirectivesEnumImpl, "gen-directive-impl",
+-                   "Generate directive related implementation code")));
++                   "Generate directive related implementation code"),
++        clEnumValN(GenSVMLVariants, "gen-svml",
++                   "Generate SVML variant function names")));
+
+ cl::OptionCategory PrintEnumsCat("Options for -print-enums");
+ cl::opt<std::string> Class("class", cl::desc("Print Enum list for this class"),
+@@ -272,6 +275,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
+   case GenDirectivesEnumImpl:
+     EmitDirectivesImpl(Records, OS);
+     break;
++  case GenSVMLVariants:
++    EmitSVMLVariants(Records, OS);
++    break;
+   }
+
+   return false;
+diff --git a/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h b/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h
+index 71db8dc77b052..86c3a3068c2dc 100644
+--- a/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h
++++ b/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h
+@@ -93,6 +93,7 @@ void EmitExegesis(RecordKeeper &RK, raw_ostream &OS);
+ void EmitAutomata(RecordKeeper &RK, raw_ostream &OS);
+ void EmitDirectivesDecl(RecordKeeper &RK, raw_ostream &OS);
+ void EmitDirectivesImpl(RecordKeeper &RK, raw_ostream &OS);
++void EmitSVMLVariants(RecordKeeper &RK, raw_ostream &OS);
+
+ } // End llvm namespace
+
+diff --git a/llvm-14.0.6.src/utils/vim/syntax/llvm.vim b/llvm-14.0.6.src/utils/vim/syntax/llvm.vim
+index 205db16b7d8cd..2572ab5a59e1b 100644
+--- a/llvm-14.0.6.src/utils/vim/syntax/llvm.vim
++++ b/llvm-14.0.6.src/utils/vim/syntax/llvm.vim
+@@ -104,6 +104,7 @@ syn keyword llvmKeyword
+       \ inreg
+       \ intel_ocl_bicc
+       \ inteldialect
++      \ intel_svmlcc
+       \ internal
+       \ jumptable
+       \ linkonce
diff --git a/conda-recipes/llvmdev/bld.bat b/conda-recipes/llvmdev/bld.bat
index 1ce228c80..e48800dc5 100644
--- a/conda-recipes/llvmdev/bld.bat
+++ b/conda-recipes/llvmdev/bld.bat
@@ -1,3 +1,13 @@
+setlocal EnableDelayedExpansion
+FOR /D %%d IN (llvm-*.src) DO (MKLINK /J llvm %%d
+if !errorlevel! neq 0 exit /b %errorlevel%)
+FOR /D %%d IN (lld-*.src) DO (MKLINK /J lld %%d
+if !errorlevel! neq 0 exit /b %errorlevel%)
+FOR /D %%d IN (unwind\libunwind-*.src) DO (MKLINK /J libunwind %%d
+if !errorlevel! neq 0 exit /b %errorlevel%)
+
+DIR
+
 mkdir build
 cd build

@@ -24,31 +34,18 @@ REM the 64bit linker anyway. This must be passed in to certain generators as
 REM '-Thost x64'.
 set PreferredToolArchitecture=x64

-set MAX_INDEX_CMAKE_GENERATOR=2
-
-REM On older generators we can squeete the architecture into the generator
-REM name. In newer generators, we must use the -A flag for cmake to hand in the
-REM correct architecture. Also, using Visual Studio 16 2019 we use toolset
-REM v141, which basically means use a Visual Studio 15 2017 type compiler from
-REM Visual Studio 16 2019. See also:
-REM https://stackoverflow.com/questions/55708600/whats-the-cmake-generator-for-visual-studio-2019
+set MAX_INDEX_CMAKE_GENERATOR=0

-set "CMAKE_GENERATOR[0]=Visual Studio 14 2015%ARCH_POSTFIX%"
-set "CMAKE_GENERATOR[1]=Visual Studio 15 2017%ARCH_POSTFIX%"
-set "CMAKE_GENERATOR[2]=Visual Studio 16 2019"
+set "CMAKE_GENERATOR[0]=Visual Studio 16 2019"

-set "CMAKE_GENERATOR_ARCHITECTURE[0]="
-set "CMAKE_GENERATOR_ARCHITECTURE[1]="
-set "CMAKE_GENERATOR_ARCHITECTURE[2]=%GEN_ARCH%"
+set "CMAKE_GENERATOR_ARCHITECTURE[0]=%GEN_ARCH%"

-set "CMAKE_GENERATOR_TOOLSET[0]=host %PreferredToolArchitecture%"
-set "CMAKE_GENERATOR_TOOLSET[1]=host  %PreferredToolArchitecture%"
-set "CMAKE_GENERATOR_TOOLSET[2]=v141"
+set "CMAKE_GENERATOR_TOOLSET[0]=v142"

 REM Reduce build times and package size by removing unused stuff
 REM BENCHMARKS (new for llvm8) don't build under Visual Studio 14 2015
 set CMAKE_CUSTOM=-DLLVM_TARGETS_TO_BUILD="%LLVM_TARGETS_TO_BUILD%" ^
-    -DLLVM_INCLUDE_TESTS=OFF ^
+    -DLLVM_ENABLE_PROJECTS:STRING=lld ^
     -DLLVM_INCLUDE_UTILS=ON ^
     -DLLVM_INCLUDE_DOCS=OFF ^
     -DLLVM_INCLUDE_EXAMPLES=OFF ^
@@ -67,7 +64,7 @@ for /l %%n in (0,1,%MAX_INDEX_CMAKE_GENERATOR%) do (
           -DCMAKE_BUILD_TYPE="%BUILD_CONFIG%" ^
           -DCMAKE_PREFIX_PATH="%LIBRARY_PREFIX%" ^
           -DCMAKE_INSTALL_PREFIX:PATH="%LIBRARY_PREFIX%" ^
-          %CMAKE_CUSTOM% "%SRC_DIR%"
+          %CMAKE_CUSTOM% "%SRC_DIR%\llvm"
     if not errorlevel 1 goto configuration_successful
     del CMakeCache.txt
 )
diff --git a/conda-recipes/llvmdev/build.sh b/conda-recipes/llvmdev/build.sh
index fd99eee90..2cc8464c6 100644
--- a/conda-recipes/llvmdev/build.sh
+++ b/conda-recipes/llvmdev/build.sh
@@ -15,10 +15,14 @@ else
     DARWIN_TARGET=x86_64-apple-darwin13.4.0
 fi

+mv llvm-*.src llvm
+mv lld-*.src lld
+mv unwind/libunwind-*.src libunwind

 declare -a _cmake_config
 _cmake_config+=(-DCMAKE_INSTALL_PREFIX:PATH=${PREFIX})
 _cmake_config+=(-DCMAKE_BUILD_TYPE:STRING=Release)
+_cmake_config+=(-DLLVM_ENABLE_PROJECTS:STRING="lld")
 # The bootstrap clang I use was built with a static libLLVMObject.a and I trying to get the same here
 # _cmake_config+=(-DBUILD_SHARED_LIBS:BOOL=ON)
 _cmake_config+=(-DLLVM_ENABLE_ASSERTIONS:BOOL=ON)
@@ -27,6 +31,7 @@ _cmake_config+=(-DLINK_POLLY_INTO_TOOLS:BOOL=ON)
 _cmake_config+=(-DLLVM_ENABLE_LIBXML2:BOOL=OFF)
 # Urgh, llvm *really* wants to link to ncurses / terminfo and we *really* do not want it to.
 _cmake_config+=(-DHAVE_TERMINFO_CURSES=OFF)
+_cmake_config+=(-DLLVM_ENABLE_TERMINFO=OFF)
 # Sometimes these are reported as unused. Whatever.
 _cmake_config+=(-DHAVE_TERMINFO_NCURSES=OFF)
 _cmake_config+=(-DHAVE_TERMINFO_NCURSESW=OFF)
@@ -39,10 +44,10 @@ _cmake_config+=(-DLLVM_ENABLE_RTTI=OFF)
 _cmake_config+=(-DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD})
 _cmake_config+=(-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly)
 _cmake_config+=(-DLLVM_INCLUDE_UTILS=ON) # for llvm-lit
+_cmake_config+=(-DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF) # doesn't build without the rest of LLVM project
 # TODO :: It would be nice if we had a cross-ecosystem 'BUILD_TIME_LIMITED' env var we could use to
 #         disable these unnecessary but useful things.
 if [[ ${CONDA_FORGE} == yes ]]; then
-  _cmake_config+=(-DLLVM_INCLUDE_TESTS=OFF)
   _cmake_config+=(-DLLVM_INCLUDE_DOCS=OFF)
   _cmake_config+=(-DLLVM_INCLUDE_EXAMPLES=OFF)
 fi
@@ -76,7 +81,7 @@ cd build

 cmake -G'Unix Makefiles'     \
       "${_cmake_config[@]}"  \
-      ..
+      ../llvm

 ARCH=`uname -m`
 if [ $ARCH == 'armv7l' ]; then # RPi need thread count throttling
@@ -85,6 +90,8 @@ else
     make -j${CPU_COUNT} VERBOSE=1
 fi

+make check-llvm-unit || exit $?
+
 # From: https://github.com/conda-forge/llvmdev-feedstock/pull/53
 make install || exit $?

@@ -93,10 +100,3 @@ if [[ $ARCH == 'x86_64' ]]; then
    bin/opt -S -vector-library=SVML -mcpu=haswell -O3 $RECIPE_DIR/numba-3016.ll | bin/FileCheck $RECIPE_DIR/numba-3016.ll || exit $?
 fi

-# run the tests, skip some on linux-32
-cd ../test
-if [[ $ARCH == 'i686' ]]; then
-    ../build/bin/llvm-lit -vv Transforms Analysis CodeGen/X86
-else
-    ../build/bin/llvm-lit -vv Transforms ExecutionEngine Analysis CodeGen/X86
-fi
diff --git a/conda-recipes/llvmdev/meta.yaml b/conda-recipes/llvmdev/meta.yaml
index 27b596ffc..1a8e67032 100644
--- a/conda-recipes/llvmdev/meta.yaml
+++ b/conda-recipes/llvmdev/meta.yaml
@@ -1,8 +1,9 @@
-{% set shortversion = "11.1" %}
-{% set version = "11.1.0" %}
-{% set sha256_llvm = "ce8508e318a01a63d4e8b3090ab2ded3c598a50258cc49e2625b9120d4c03ea5" %}
-{% set sha256_lld = "017a788cbe1ecc4a949abf10755870519086d058a2e99f438829aef24f0c66ce" %}
-{% set build_number = "5" %}
+{% set shortversion = "14.0" %}
+{% set version = "14.0.6" %}
+{% set sha256_llvm = "050922ecaaca5781fdf6631ea92bc715183f202f9d2f15147226f023414f619a" %}
+{% set sha256_lld = "0c28ce0496934d37d20fec96591032dd66af8d10178a45762e0e75e85cf95ad3" %}
+{% set sha256_libunwind = "3bbe9c23c73259fe39c045dc87d0b283236ba6e00750a226b2c2aeac4a51d86b" %}
+{% set build_number = "0" %}

 package:
   name: llvmdev
@@ -13,20 +14,16 @@ source:
     fn: llvm-{{ version }}.src.tar.xz
     sha256: {{ sha256_llvm }}
     patches:
-    - ../partial-testing.patch
-    # Intel SVML optimizations (two patches)
-    - ../intel-D47188-svml-VF.patch
-    # Second patch from https://github.com/conda-forge/llvmdev-feedstock/blob/c706309/recipe/patches/expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
-    - ../expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
-    # Reverts a patch limiting non-GlobalValue name length
-    - ../0001-Revert-Limit-size-of-non-GlobalValue-name.patch
-    # Fixes for aarch64 on LLVM 11 from https://reviews.llvm.org/D104123
-    - ../llvm_11_consecutive_registers.patch
-
+    - ../llvm14-remove-use-of-clonefile.patch
+    - ../llvm14-svml.patch
   - url: https://github.com/llvm/llvm-project/releases/download/llvmorg-{{ version }}/lld-{{ version }}.src.tar.xz
     fn: lld-{{ version }}.src.tar.xz
     sha256: {{ sha256_lld }}
-    folder: tools/lld
+
+  - url: https://github.com/llvm/llvm-project/releases/download/llvmorg-{{ version }}/libunwind-{{ version }}.src.tar.xz
+    fn: libunwind-{{ version }}.src.tar.xz
+    sha256: {{ sha256_libunwind }}
+    folder: unwind

 build:
   number: {{ build_number }}
@@ -81,5 +78,5 @@ about:
   home: http://llvm.org/
   dev_url: https://github.com/llvm-mirror/llvm
   license: NCSA
-  license_file: LICENSE.TXT
+  license_file: llvm/LICENSE.TXT
   summary: Development headers and libraries for LLVM
diff --git a/conda-recipes/llvmlite/bld.bat b/conda-recipes/llvmlite/bld.bat
index 475a0637c..d7342e249 100755
--- a/conda-recipes/llvmlite/bld.bat
+++ b/conda-recipes/llvmlite/bld.bat
@@ -12,11 +12,8 @@ if "%ARCH%"=="32" (
     @rem set CMAKE_GENERATOR_ARCH=Win64
     set CMAKE_GENERATOR_ARCH=x64
 )
-@rem for older VS:
-@rem set CMAKE_GENERATOR=Visual Studio 15 2017
-@rem do not set CMAKE_GENERATOR_TOOLKIT
 set CMAKE_GENERATOR=Visual Studio 16 2019
-set CMAKE_GENERATOR_TOOLKIT=v141
+set CMAKE_GENERATOR_TOOLKIT=v142

 @rem Ensure there are no build leftovers (CMake can complain)
 if exist ffi\build rmdir /S /Q ffi\build
diff --git a/conda-recipes/llvmlite/meta.yaml b/conda-recipes/llvmlite/meta.yaml
index bf083becd..ff897f7af 100644
--- a/conda-recipes/llvmlite/meta.yaml
+++ b/conda-recipes/llvmlite/meta.yaml
@@ -1,4 +1,4 @@
-{% set VERSION_SUFFIX = "" %} # debug version suffix, appended to the version
+{% set VERSION_SUFFIX = "llvm14" %} # debug version suffix, appended to the version

 package:
   name: llvmlite
@@ -23,19 +23,19 @@ requirements:
     # build.sh deals with it!
     - {{ compiler('c') }}    # [not (osx or armv6l or armv7l or win)]
     - {{ compiler('cxx') }}  # [not (osx or armv6l or armv7l or win)]
-    - vs2017_{{ target_platform  }}    # [win]
+    - vs2015_{{ target_platform  }}    # [win]
     # The DLL build uses cmake on Windows
     - cmake          # [win]
     - make           # [unix and not (armv6l or armv7l or aarch64)]
   host:
     - python
     # On channel https://anaconda.org/numba/
-    - llvmdev 11.1.0 *5 # [(osx and arm64)]
-    - llvmdev 11.1.0 *4 # [not ((osx and arm64) or win)]
-    - llvmdev 11.1.0 4 # [win]
+    - llvmdev 14
     - vs2015_runtime # [win]
     # llvmdev is built with libz compression support
     - zlib           # [unix and not (armv6l or armv7l)]
+    # requires libxml2
+    - libxml2        # [win]
   run:
     - python >=3.8,<3.10
     - vs2015_runtime # [win]
diff --git a/ffi/Makefile.freebsd b/ffi/Makefile.freebsd
index ba727e331..7b869e876 100644
--- a/ffi/Makefile.freebsd
+++ b/ffi/Makefile.freebsd
@@ -1,5 +1,5 @@

-CXX = clang++ -std=c++11 -stdlib=libc++
+CXX = clang++ -stdlib=libc++

 # -flto and --exclude-libs allow us to remove those parts of LLVM we don't use
 CXX_FLTO_FLAGS ?= -flto
diff --git a/ffi/Makefile.osx b/ffi/Makefile.osx
index bc192071e..74dccf32c 100644
--- a/ffi/Makefile.osx
+++ b/ffi/Makefile.osx
@@ -1,6 +1,6 @@

-CXX = clang++ -std=c++11 -stdlib=libc++
-CXXFLAGS = $(LLVM_CXXFLAGS)
+CXX = clang++
+CXXFLAGS = $(LLVM_CXXFLAGS) -O3
 # Only export the LLVMPY symbols we require and exclude everything else.
 EXPORT = "-Wl,-exported_symbol,_LLVMPY_*"
 LDFLAGS :=  $(LDFLAGS) $(EXPORT) $(LLVM_LDFLAGS)
diff --git a/ffi/build.py b/ffi/build.py
index 55343fca5..e58a691e0 100755
--- a/ffi/build.py
+++ b/ffi/build.py
@@ -72,10 +72,10 @@ def find_windows_generator():
         )

     generators.extend([
-        # use VS2017 toolkit on VS2019 to match how llvmdev is built
-        ('Visual Studio 16 2019', ('x64' if is_64bit else 'Win32'), 'v141'),
-        # This is the generator configuration for VS2017
-        ('Visual Studio 15 2017' + (' Win64' if is_64bit else ''), None, None)
+        # use VS2019 to match how llvmdev is built
+        ('Visual Studio 16 2019', ('x64' if is_64bit else 'Win32'), 'v142'),
+        # # This is the generator configuration for VS2017
+        # ('Visual Studio 15 2017' + (' Win64' if is_64bit else ''), None, None)
     ])
     for generator in generators:
         build_dir = tempfile.mkdtemp()
@@ -163,9 +163,10 @@ def main_posix(kind, library_ext):
         print(msg)
         print(warning + '\n')
     else:
-
-        if not out.startswith('11'):
-            msg = ("Building llvmlite requires LLVM 11.x.x, got "
+        (version, _) = out.split('.', 1)
+        version = int(version)
+        if version < 11 or version > 14:
+            msg = ("Building llvmlite requires LLVM 11, 12, 13, or 14, got "
                    "{!r}. Be sure to set LLVM_CONFIG to the right executable "
                    "path.\nRead the documentation at "
                    "http://llvmlite.pydata.org/ for more information about "
diff --git a/ffi/passmanagers.cpp b/ffi/passmanagers.cpp
index dd67ca5cc..60064cf10 100644
--- a/ffi/passmanagers.cpp
+++ b/ffi/passmanagers.cpp
@@ -16,11 +16,8 @@

 #include "llvm-c/Transforms/IPO.h"
 #include "llvm-c/Transforms/Scalar.h"
-#include "llvm/IR/LegacyPassManager.h"
-#if LLVM_VERSION_MAJOR > 11
-#include "llvm/IR/RemarkStreamer.h"
-#endif
 #include "llvm/IR/LLVMRemarkStreamer.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Remarks/RemarkStreamer.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Scalar.h"
@@ -220,7 +217,11 @@ LLVMPY_AddLazyValueInfoPass(LLVMPassManagerRef PM) {
 }
 API_EXPORT(void)
 LLVMPY_AddLintPass(LLVMPassManagerRef PM) {
+#if LLVM_VERSION_MAJOR < 12
     unwrap(PM)->add(llvm::createLintPass());
+#else
+    unwrap(PM)->add(llvm::createLintLegacyPassPass());
+#endif
 }
 API_EXPORT(void)
 LLVMPY_AddModuleDebugInfoPrinterPass(LLVMPassManagerRef PM) {
diff --git a/ffi/targets.cpp b/ffi/targets.cpp
index 3b5abf510..b96d22c9f 100644
--- a/ffi/targets.cpp
+++ b/ffi/targets.cpp
@@ -6,7 +6,11 @@
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/Host.h"
+#if LLVM_VERSION_MAJOR > 13
+#include "llvm/MC/TargetRegistry.h"
+#else
 #include "llvm/Support/TargetRegistry.h"
+#endif
 #include "llvm/Target/TargetMachine.h"

 #include <cstdio>
@@ -204,7 +208,11 @@ LLVMPY_CreateTargetMachine(LLVMTargetRef T, const char *Triple, const char *CPU,
         rm = Reloc::DynamicNoPIC;

     TargetOptions opt;
+#if LLVM_VERSION_MAJOR < 12
     opt.PrintMachineCode = PrintMC;
+#else
+    opt.MCOptions.ShowMCInst = PrintMC;
+#endif
     opt.MCOptions.ABIName = ABIName;

     bool jit = JIT;
diff --git a/ffi/value.cpp b/ffi/value.cpp
index 771acd423..01871699d 100644
--- a/ffi/value.cpp
+++ b/ffi/value.cpp
@@ -153,8 +153,13 @@ LLVMPY_ArgumentAttributesIter(LLVMValueRef A) {
     using namespace llvm;
     Argument *arg = unwrap<Argument>(A);
     unsigned argno = arg->getArgNo();
-    AttributeSet attrs =
-        arg->getParent()->getAttributes().getParamAttributes(argno);
+    const AttributeSet attrs = arg->getParent()->getAttributes().
+#if LLVM_VERSION_MAJOR < 14
+                               getParamAttributes(argno)
+#else
+                               getParamAttrs(argno)
+#endif
+        ;
     return wrap(new AttributeSetIterator(attrs.begin(), attrs.end()));
 }

@@ -353,7 +358,11 @@ LLVMPY_GetElementType(LLVMTypeRef type) {
     llvm::Type *unwrapped = llvm::unwrap(type);
     llvm::PointerType *ty = llvm::dyn_cast<llvm::PointerType>(unwrapped);
     if (ty != nullptr) {
+#if LLVM_VERSION_MAJOR < 14
         return llvm::wrap(ty->getElementType());
+#else
+        return llvm::wrap(ty->getPointerElementType());
+#endif
     }
     return nullptr;
 }
diff --git a/llvmlite/binding/passmanagers.py b/llvmlite/binding/passmanagers.py
index 26f7bd259..4b9daf468 100644
--- a/llvmlite/binding/passmanagers.py
+++ b/llvmlite/binding/passmanagers.py
@@ -199,7 +199,8 @@ def add_lint_pass(self):
         """
         See https://llvm.org/docs/Passes.html#lint-statically-lint-checks-llvm-ir

-        LLVM 11+: `llvm::createLintPass`
+        LLVM 11: `llvm::createLintPass`
+        LLVM 12+: `llvm::createLintLegacyPassPass`
         """  # noqa E501
         ffi.lib.LLVMPY_AddLintPass(self)

diff --git a/llvmlite/tests/test_binding.py b/llvmlite/tests/test_binding.py
index dc4dbc484..70902e04c 100644
--- a/llvmlite/tests/test_binding.py
+++ b/llvmlite/tests/test_binding.py
@@ -640,7 +640,7 @@ def test_set_option(self):
     def test_version(self):
         major, minor, patch = llvm.llvm_version_info
         # one of these can be valid
-        valid = [(11,)]
+        valid = [(11,), (12, ), (13, ), (14, )]
         self.assertIn((major,), valid)
         self.assertIn(patch, range(10))

From 355338e931f488926b07a2f6eaf83ecd39e9abb7 Mon Sep 17 00:00:00 2001
From: Andre Masella <andre@masella.name>
Date: Thu, 8 Dec 2022 17:26:05 -0500
Subject: [PATCH] Automatically detect common return blocks in ref prune

Change reference pruning algorithm to detect when a common return block is
generated and determine if it return non-zero indicating an exception path.
LLVM 14 automatically generates code like this.
---
 ffi/custom_passes.cpp           | 83 ++++++++++++++++++++++++---------
 llvmlite/tests/test_refprune.py | 25 +++++++++-
 2 files changed, 85 insertions(+), 23 deletions(-)

diff --git a/ffi/custom_passes.cpp b/ffi/custom_passes.cpp
index 21e0bbcff..a04b4b64c 100644
--- a/ffi/custom_passes.cpp
+++ b/ffi/custom_passes.cpp
@@ -905,9 +905,7 @@ struct RefPrunePass : public FunctionPass {
     }

     /**
-     * Check if a basic block is a block which raises, this relies on a
-     * metadata "ret_is_raise" being present the terminator and the
-     * terminator opcode being Instruction::Ret.
+     * Check if a basic block is a block which raises, based on the return value.
      *
      * Parameters:
      *  - bb a basic block
@@ -920,27 +918,68 @@ struct RefPrunePass : public FunctionPass {

         // Get the terminator
         auto term = bb->getTerminator();
-        // Get the opcode of the terminator, if it's not a Ret then return false
-        if (term->getOpcode() != Instruction::Ret)
-            return false;
-        // Get the metadata on the terminator node
-        auto md = term->getMetadata("ret_is_raise");
-        // If there's no metadata return false (normal or unmarked Ret)
-        if (!md)
-            return false;
-        // If the number of operands on the metadata is not 1 then return false
-        if (md->getNumOperands() != 1)
+        // Get the opcode of the terminator, if it's a Ret then check
+        if (term->getOpcode() == Instruction::Ret) {
+            // With one operand
+            if (term->getNumOperands() != 1) {
+                return false;
+            }
+            auto operand = term->getOperand(0);
+            // If the operand is a constant, check if it indicates an exception
+            auto int_operand = dyn_cast<ConstantInt>(operand);
+            if (int_operand && int_operand->isOneValue()) {
+                return true;
+            }
+            // If the operand is a PHI node, check if there is a non-exception
+            // path. We don't know which path we're on, but since the
+            // exceptional path will lookahead, so if there is a non-exceptional
+            // path, we can assume were on it.
+            auto phi_operand = dyn_cast<PHINode>(operand);
+            if (phi_operand) {
+                for (auto& phi_arg_value : phi_operand->incoming_values()) {
+                    auto arg_value = dyn_cast<ConstantInt>(phi_arg_value);
+                    if (arg_value && !arg_value->isOneValue()) {
+                        return false;
+                    }
+                }
+                return true;
+            }
             return false;
-        // Fetch the ref to the metadata operand at location 0
-        auto &operand = md->getOperand(0);
-        // and then cast the const as Metadata (Numba sets this as literal 1)
-        auto data = dyn_cast<ConstantAsMetadata>(operand.get());
-        // If dyn_cast failed type check then return false
-        if (!data)
+        } else if (term->getOpcode() == Instruction::Br &&
+                   term->getNumOperands() == 1) {
+            // If it's a branch, it might be a common return block
+            auto first =
+                term->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(true);
+            if (!first) {
+                // Malformed block with no terminal instruction
+                return false;
+            }
+            // Our one and only instruction should be a return
+            if (first->getOpcode() != Instruction::Ret) {
+                return false;
+            }
+            // With one operand
+            if (first->getNumOperands() != 1) {
+                return false;
+            }
+            auto operand = first->getOperand(0);
+            // If the operand is a constant, check if it indicates an exception
+            auto int_operand = dyn_cast<ConstantInt>(operand);
+            if (int_operand && int_operand->isOneValue()) {
+                return true;
+            }
+            // If the operand is a PHI node, check if the path we're on will
+            // yield a value indicating an exception
+            auto phi_operand = dyn_cast<PHINode>(operand);
+            if (phi_operand) {
+                auto arg_value = dyn_cast<ConstantInt>(
+                    phi_operand->getIncomingValueForBlock(bb));
+                return arg_value && arg_value->isOneValue();
+            }
+            // This path doesn't raise
             return false;
-        // get the value of the casted metadata and then return bool on whether
-        // it is the number one.
-        return data->getValue()->isOneValue();
+        }
+        return false;
     }

     /**
diff --git a/llvmlite/tests/test_refprune.py b/llvmlite/tests/test_refprune.py
index d4f7b3035..ba53ab1ff 100644
--- a/llvmlite/tests/test_refprune.py
+++ b/llvmlite/tests/test_refprune.py
@@ -456,7 +456,8 @@ def test_fanout_raise_1(self):

     def test_fanout_raise_2(self):
         mod, stats = self.check(self.fanout_raise_2)
-        self.assertEqual(stats.fanout_raise, 0)
+        # Change in behaviour: ignore bad metadata
+        self.assertEqual(stats.fanout_raise, 2)

     fanout_raise_3 = r"""
 define i32 @main(i8* %ptr, i1 %cond) {
@@ -495,6 +496,28 @@ def test_fanout_raise_4(self):
         mod, stats = self.check(self.fanout_raise_4)
         self.assertEqual(stats.fanout_raise, 0)

+    fanout_raise_5 = r"""
+define i32 @main(i8* %ptr, i1 %cond) {
+bb_A:
+    call void @NRT_incref(i8* %ptr)
+    br i1 %cond, label %bb_B, label %bb_C
+bb_B:
+    call void @NRT_decref(i8* %ptr)
+    br label %common.ret
+bb_C:
+    br label %common.ret       ; pretend we throw an exception
+common.ret:
+    %common.ret.op = phi i32 [ 0, %bb_B ], [ 1, %bb_C ]
+    ret i32 %common.ret.op
+}
+
+!0 = !{i32 1}
+"""
+
+    def test_fanout_raise_5(self):
+        mod, stats = self.check(self.fanout_raise_5)
+        self.assertEqual(stats.fanout_raise, 2)
+

 if __name__ == '__main__':
     unittest.main()