3177 lines
131 KiB
Diff
3177 lines
131 KiB
Diff
From d143023557a117d4cad4b4785ac3e4bb36264e3e Mon Sep 17 00:00:00 2001
|
|
From: Andre Masella <andre@masella.name>
|
|
Date: Tue, 5 Apr 2022 15:22:21 -0400
|
|
Subject: [PATCH] Update to LLVM 12-14
|
|
|
|
Modify llvmlite to support LLVM 11-14 and modify conda recipe to build LLVM14.
|
|
Also lift over all patches to LLVM versions as required.
|
|
---
|
|
...-Limit-size-of-non-GlobalValue-name.patch} | 0
|
|
...tch => llvm11-consecutive_registers.patch} | 0
|
|
...-entrypoints-in-add-TLI-mappings.ll.patch} | 0
|
|
...atch => llvm11-intel-D47188-svml-VF.patch} | 0
|
|
...o-static.patch => llvm11-lto-static.patch} | 0
|
|
...ing.patch => llvm11-partial-testing.patch} | 0
|
|
...t-Limit-size-of-non-GlobalValue-name.patch | 49 +
|
|
.../llvm12-consecutive_registers.patch | 181 ++
|
|
conda-recipes/llvm12-lto-static.patch | 12 +
|
|
conda-recipes/llvm13-lto-static.patch | 12 +
|
|
.../llvm14-remove-use-of-clonefile.patch | 54 +
|
|
conda-recipes/llvm14-svml.patch | 2192 +++++++++++++++++
|
|
conda-recipes/llvmdev/bld.bat | 35 +-
|
|
conda-recipes/llvmdev/build.sh | 18 +-
|
|
conda-recipes/llvmdev/meta.yaml | 31 +-
|
|
conda-recipes/llvmlite/bld.bat | 5 +-
|
|
conda-recipes/llvmlite/meta.yaml | 10 +-
|
|
ffi/Makefile.freebsd | 2 +-
|
|
ffi/Makefile.osx | 4 +-
|
|
ffi/build.py | 15 +-
|
|
ffi/passmanagers.cpp | 9 +-
|
|
ffi/targets.cpp | 8 +
|
|
ffi/value.cpp | 13 +-
|
|
llvmlite/binding/passmanagers.py | 3 +-
|
|
llvmlite/tests/test_binding.py | 2 +-
|
|
25 files changed, 2583 insertions(+), 72 deletions(-)
|
|
rename conda-recipes/{0001-Revert-Limit-size-of-non-GlobalValue-name.patch => llvm11-0001-Revert-Limit-size-of-non-GlobalValue-name.patch} (100%)
|
|
rename conda-recipes/{llvm_11_consecutive_registers.patch => llvm11-consecutive_registers.patch} (100%)
|
|
rename conda-recipes/{expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch => llvm11-expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch} (100%)
|
|
rename conda-recipes/{intel-D47188-svml-VF.patch => llvm11-intel-D47188-svml-VF.patch} (100%)
|
|
rename conda-recipes/{llvm-lto-static.patch => llvm11-lto-static.patch} (100%)
|
|
rename conda-recipes/{partial-testing.patch => llvm11-partial-testing.patch} (100%)
|
|
create mode 100644 conda-recipes/llvm12-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
|
|
create mode 100644 conda-recipes/llvm12-consecutive_registers.patch
|
|
create mode 100644 conda-recipes/llvm12-lto-static.patch
|
|
create mode 100644 conda-recipes/llvm13-lto-static.patch
|
|
create mode 100644 conda-recipes/llvm14-remove-use-of-clonefile.patch
|
|
create mode 100644 conda-recipes/llvm14-svml.patch
|
|
|
|
diff --git a/conda-recipes/0001-Revert-Limit-size-of-non-GlobalValue-name.patch b/conda-recipes/llvm11-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
|
|
similarity index 100%
|
|
rename from conda-recipes/0001-Revert-Limit-size-of-non-GlobalValue-name.patch
|
|
rename to conda-recipes/llvm11-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
|
|
diff --git a/conda-recipes/llvm_11_consecutive_registers.patch b/conda-recipes/llvm11-consecutive_registers.patch
|
|
similarity index 100%
|
|
rename from conda-recipes/llvm_11_consecutive_registers.patch
|
|
rename to conda-recipes/llvm11-consecutive_registers.patch
|
|
diff --git a/conda-recipes/expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch b/conda-recipes/llvm11-expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
|
|
similarity index 100%
|
|
rename from conda-recipes/expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
|
|
rename to conda-recipes/llvm11-expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
|
|
diff --git a/conda-recipes/intel-D47188-svml-VF.patch b/conda-recipes/llvm11-intel-D47188-svml-VF.patch
|
|
similarity index 100%
|
|
rename from conda-recipes/intel-D47188-svml-VF.patch
|
|
rename to conda-recipes/llvm11-intel-D47188-svml-VF.patch
|
|
diff --git a/conda-recipes/llvm-lto-static.patch b/conda-recipes/llvm11-lto-static.patch
|
|
similarity index 100%
|
|
rename from conda-recipes/llvm-lto-static.patch
|
|
rename to conda-recipes/llvm11-lto-static.patch
|
|
diff --git a/conda-recipes/partial-testing.patch b/conda-recipes/llvm11-partial-testing.patch
|
|
similarity index 100%
|
|
rename from conda-recipes/partial-testing.patch
|
|
rename to conda-recipes/llvm11-partial-testing.patch
|
|
diff --git a/conda-recipes/llvm12-0001-Revert-Limit-size-of-non-GlobalValue-name.patch b/conda-recipes/llvm12-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
|
|
new file mode 100644
|
|
index 000000000..9b722d36c
|
|
--- /dev/null
|
|
+++ b/conda-recipes/llvm12-0001-Revert-Limit-size-of-non-GlobalValue-name.patch
|
|
@@ -0,0 +1,49 @@
|
|
+diff -ur a/lib/IR/Value.cpp b/lib/IR/Value.cpp
|
|
+--- a/lib/IR/Value.cpp 2021-04-06 12:38:18.000000000 -0400
|
|
++++ b/lib/IR/Value.cpp 2022-03-31 15:39:31.000000000 -0400
|
|
+@@ -38,10 +38,6 @@
|
|
+
|
|
+ using namespace llvm;
|
|
+
|
|
+-static cl::opt<unsigned> NonGlobalValueMaxNameSize(
|
|
+- "non-global-value-max-name-size", cl::Hidden, cl::init(1024),
|
|
+- cl::desc("Maximum size for the name of non-global values."));
|
|
+-
|
|
+ //===----------------------------------------------------------------------===//
|
|
+ // Value Class
|
|
+ //===----------------------------------------------------------------------===//
|
|
+@@ -319,11 +315,6 @@
|
|
+ if (getName() == NameRef)
|
|
+ return;
|
|
+
|
|
+- // Cap the size of non-GlobalValue names.
|
|
+- if (NameRef.size() > NonGlobalValueMaxNameSize && !isa<GlobalValue>(this))
|
|
+- NameRef =
|
|
+- NameRef.substr(0, std::max(1u, (unsigned)NonGlobalValueMaxNameSize));
|
|
+-
|
|
+ assert(!getType()->isVoidTy() && "Cannot assign a name to void values!");
|
|
+
|
|
+ // Get the symbol table to update for this object.
|
|
+diff -ur a/test/Bitcode/value-with-long-name.ll b/test/Bitcode/value-with-long-name.ll
|
|
+deleted file mode 1000644
|
|
+--- a/test/Bitcode/value-with-long-name.ll
|
|
++++ /dev/null
|
|
+@@ -1,18 +0,0 @@
|
|
+-; Check the size of generated variable when no option is set
|
|
+-; RUN: opt -S %s -O2 -o - | FileCheck -check-prefix=CHECK-LONG %s
|
|
+-; CHECK-LONG: %{{[a-z]{4}[a-z]+}}
|
|
+-
|
|
+-; Then check we correctly cap the size of newly generated non-global values name
|
|
+-; Force the size to be small so that the check works on release and debug build
|
|
+-; RUN: opt -S %s -O2 -o - -non-global-value-max-name-size=0 | FileCheck -check-prefix=CHECK-SHORT %s
|
|
+-; RUN: opt -S %s -O2 -o - -non-global-value-max-name-size=1 | FileCheck -check-prefix=CHECK-SHORT %s
|
|
+-; CHECK-SHORT-NOT: %{{[a-z][a-z]+}}
|
|
+-
|
|
+-define i32 @f(i32 %a, i32 %b) {
|
|
+- %c = add i32 %a, %b
|
|
+- %d = add i32 %c, %a
|
|
+- %e = add i32 %d, %b
|
|
+- ret i32 %e
|
|
+-}
|
|
+-
|
|
+-
|
|
diff --git a/conda-recipes/llvm12-consecutive_registers.patch b/conda-recipes/llvm12-consecutive_registers.patch
|
|
new file mode 100644
|
|
index 000000000..cc60217bd
|
|
--- /dev/null
|
|
+++ b/conda-recipes/llvm12-consecutive_registers.patch
|
|
@@ -0,0 +1,181 @@
|
|
+diff -ur a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h
|
|
+--- a/include/llvm/CodeGen/TargetLowering.h 2021-04-06 12:38:18.000000000 -0400
|
|
++++ b/include/llvm/CodeGen/TargetLowering.h 2022-03-31 15:52:45.000000000 -0400
|
|
+@@ -3975,7 +3975,8 @@
|
|
+ /// must be passed in a block of consecutive registers.
|
|
+ virtual bool
|
|
+ functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv,
|
|
+- bool isVarArg) const {
|
|
++ bool isVarArg,
|
|
++ const DataLayout &DL) const {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+diff -ur a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
|
|
+--- a/lib/CodeGen/SelectionDAG/FastISel.cpp 2021-04-06 12:38:18.000000000 -0400
|
|
++++ b/lib/CodeGen/SelectionDAG/FastISel.cpp 2022-03-31 15:52:45.000000000 -0400
|
|
+@@ -1087,7 +1087,7 @@
|
|
+ if (Arg.IsByVal)
|
|
+ FinalType = cast<PointerType>(Arg.Ty)->getElementType();
|
|
+ bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
|
|
+- FinalType, CLI.CallConv, CLI.IsVarArg);
|
|
++ FinalType, CLI.CallConv, CLI.IsVarArg, DL);
|
|
+
|
|
+ ISD::ArgFlagsTy Flags;
|
|
+ if (Arg.IsZExt)
|
|
+diff -ur a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
|
|
+--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp 2021-04-06 12:38:18.000000000 -0400
|
|
++++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp 2022-03-31 15:52:45.000000000 -0400
|
|
+@@ -1851,7 +1851,7 @@
|
|
+
|
|
+ bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
|
|
+ I.getOperand(0)->getType(), F->getCallingConv(),
|
|
+- /*IsVarArg*/ false);
|
|
++ /*IsVarArg*/ false, DL);
|
|
+
|
|
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
|
|
+ if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
|
|
+@@ -9229,7 +9229,7 @@
|
|
+ CLI.IsTailCall = false;
|
|
+ } else {
|
|
+ bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
|
|
+- CLI.RetTy, CLI.CallConv, CLI.IsVarArg);
|
|
++ CLI.RetTy, CLI.CallConv, CLI.IsVarArg, DL);
|
|
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
|
|
+ ISD::ArgFlagsTy Flags;
|
|
+ if (NeedsRegBlock) {
|
|
+@@ -9289,7 +9289,7 @@
|
|
+ if (Args[i].IsByVal)
|
|
+ FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
|
|
+ bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
|
|
+- FinalType, CLI.CallConv, CLI.IsVarArg);
|
|
++ FinalType, CLI.CallConv, CLI.IsVarArg, DL);
|
|
+ for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
|
|
+ ++Value) {
|
|
+ EVT VT = ValueVTs[Value];
|
|
+@@ -9830,7 +9830,7 @@
|
|
+ if (Arg.hasAttribute(Attribute::ByVal))
|
|
+ FinalType = Arg.getParamByValType();
|
|
+ bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
|
|
+- FinalType, F.getCallingConv(), F.isVarArg());
|
|
++ FinalType, F.getCallingConv(), F.isVarArg(), DL);
|
|
+ for (unsigned Value = 0, NumValues = ValueVTs.size();
|
|
+ Value != NumValues; ++Value) {
|
|
+ EVT VT = ValueVTs[Value];
|
|
+diff -ur a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
|
|
+--- a/lib/Target/AArch64/AArch64ISelLowering.cpp 2021-04-06 12:38:18.000000000 -0400
|
|
++++ b/lib/Target/AArch64/AArch64ISelLowering.cpp 2022-03-31 15:52:45.000000000 -0400
|
|
+@@ -30,6 +30,7 @@
|
|
+ #include "llvm/ADT/Triple.h"
|
|
+ #include "llvm/ADT/Twine.h"
|
|
+ #include "llvm/Analysis/VectorUtils.h"
|
|
++#include "llvm/CodeGen/Analysis.h"
|
|
+ #include "llvm/CodeGen/CallingConvLower.h"
|
|
+ #include "llvm/CodeGen/MachineBasicBlock.h"
|
|
+ #include "llvm/CodeGen/MachineFrameInfo.h"
|
|
+@@ -16455,15 +16456,17 @@
|
|
+ }
|
|
+
|
|
+ bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
|
|
+- Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
|
|
+- if (Ty->isArrayTy())
|
|
+- return true;
|
|
+-
|
|
+- const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
|
|
+- if (TySize.isScalable() && TySize.getKnownMinSize() > 128)
|
|
+- return true;
|
|
++ Type *Ty, CallingConv::ID CallConv, bool isVarArg,
|
|
++ const DataLayout &DL) const {
|
|
++ if (!Ty->isArrayTy()) {
|
|
++ const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
|
|
++ return TySize.isScalable() && TySize.getKnownMinSize() > 128;
|
|
++ }
|
|
+
|
|
+- return false;
|
|
++ // All non aggregate members of the type must have the same type
|
|
++ SmallVector<EVT, 0> ValueVTs;
|
|
++ ComputeValueVTs(*this, DL, Ty, ValueVTs);
|
|
++ return is_splat(ValueVTs);
|
|
+ }
|
|
+
|
|
+ bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
|
|
+diff -ur a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
|
|
+--- a/lib/Target/AArch64/AArch64ISelLowering.h 2021-04-06 12:38:18.000000000 -0400
|
|
++++ b/lib/Target/AArch64/AArch64ISelLowering.h 2022-03-31 15:52:45.000000000 -0400
|
|
+@@ -770,9 +770,10 @@
|
|
+ MachineMemOperand::Flags getTargetMMOFlags(
|
|
+ const Instruction &I) const override;
|
|
+
|
|
+- bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
|
|
+- CallingConv::ID CallConv,
|
|
+- bool isVarArg) const override;
|
|
++ bool functionArgumentNeedsConsecutiveRegisters(
|
|
++ Type *Ty, CallingConv::ID CallConv, bool isVarArg,
|
|
++ const DataLayout &DL) const override;
|
|
++
|
|
+ /// Used for exception handling on Win64.
|
|
+ bool needsFixedCatchObjects() const override;
|
|
+
|
|
+diff -ur a/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
|
|
+--- a/lib/Target/AArch64/GISel/AArch64CallLowering.cpp 2021-04-06 12:38:18.000000000 -0400
|
|
++++ b/lib/Target/AArch64/GISel/AArch64CallLowering.cpp 2022-03-31 15:52:45.000000000 -0400
|
|
+@@ -259,7 +259,7 @@
|
|
+ assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
|
|
+
|
|
+ bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
|
|
+- OrigArg.Ty, CallConv, false);
|
|
++ OrigArg.Ty, CallConv, false, DL);
|
|
+ for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
|
|
+ Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
|
|
+ SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags[0],
|
|
+diff -ur a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
|
|
+--- a/lib/Target/ARM/ARMCallLowering.cpp 2021-04-06 12:38:18.000000000 -0400
|
|
++++ b/lib/Target/ARM/ARMCallLowering.cpp 2022-03-31 15:52:45.000000000 -0400
|
|
+@@ -218,7 +218,7 @@
|
|
+
|
|
+ bool NeedsConsecutiveRegisters =
|
|
+ TLI.functionArgumentNeedsConsecutiveRegisters(
|
|
+- SplitTy, F.getCallingConv(), F.isVarArg());
|
|
++ SplitTy, F.getCallingConv(), F.isVarArg(), DL);
|
|
+ if (NeedsConsecutiveRegisters) {
|
|
+ Flags.setInConsecutiveRegs();
|
|
+ if (i == e - 1)
|
|
+diff -ur a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
|
|
+--- a/lib/Target/ARM/ARMISelLowering.cpp 2021-04-06 12:38:18.000000000 -0400
|
|
++++ b/lib/Target/ARM/ARMISelLowering.cpp 2022-03-31 15:52:45.000000000 -0400
|
|
+@@ -19269,7 +19269,8 @@
|
|
+ /// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
|
|
+ /// passing according to AAPCS rules.
|
|
+ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
|
|
+- Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
|
|
++ Type *Ty, CallingConv::ID CallConv, bool isVarArg,
|
|
++ const DataLayout &DL) const {
|
|
+ if (getEffectiveCallingConv(CallConv, isVarArg) !=
|
|
+ CallingConv::ARM_AAPCS_VFP)
|
|
+ return false;
|
|
+diff -ur a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
|
|
+--- a/lib/Target/ARM/ARMISelLowering.h 2021-04-06 12:38:18.000000000 -0400
|
|
++++ b/lib/Target/ARM/ARMISelLowering.h 2022-03-31 15:52:45.000000000 -0400
|
|
+@@ -578,7 +578,8 @@
|
|
+ /// Returns true if an argument of type Ty needs to be passed in a
|
|
+ /// contiguous block of registers in calling convention CallConv.
|
|
+ bool functionArgumentNeedsConsecutiveRegisters(
|
|
+- Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
|
|
++ Type *Ty, CallingConv::ID CallConv, bool isVarArg,
|
|
++ const DataLayout &DL) const override;
|
|
+
|
|
+ /// If a physical register, this returns the register that receives the
|
|
+ /// exception address on entry to an EH pad.
|
|
+diff -ur a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
|
|
+--- a/lib/Target/PowerPC/PPCISelLowering.h 2021-04-06 12:38:18.000000000 -0400
|
|
++++ b/lib/Target/PowerPC/PPCISelLowering.h 2022-03-31 15:52:45.000000000 -0400
|
|
+@@ -998,7 +998,8 @@
|
|
+ /// Returns true if an argument of type Ty needs to be passed in a
|
|
+ /// contiguous block of registers in calling convention CallConv.
|
|
+ bool functionArgumentNeedsConsecutiveRegisters(
|
|
+- Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override {
|
|
++ Type *Ty, CallingConv::ID CallConv, bool isVarArg,
|
|
++ const DataLayout &DL) const override {
|
|
+ // We support any array type as "consecutive" block in the parameter
|
|
+ // save area. The element type defines the alignment requirement and
|
|
+ // whether the argument should go in GPRs, FPRs, or VRs if available.
|
|
diff --git a/conda-recipes/llvm12-lto-static.patch b/conda-recipes/llvm12-lto-static.patch
|
|
new file mode 100644
|
|
index 000000000..76cc55def
|
|
--- /dev/null
|
|
+++ b/conda-recipes/llvm12-lto-static.patch
|
|
@@ -0,0 +1,12 @@
|
|
+diff -ur a/tools/lto/CMakeLists.txt b/tools/lto/CMakeLists.txt
|
|
+--- llvm-12.0.0.src-orig/tools/lto/CMakeLists.txt 2021-04-06 12:38:18.000000000 -0400
|
|
++++ llvm-12.0.0.src/tools/lto/CMakeLists.txt 2022-03-31 15:46:00.000000000 -0400
|
|
+@@ -21,7 +21,7 @@
|
|
+
|
|
+ set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/lto.exports)
|
|
+
|
|
+-add_llvm_library(LTO SHARED INSTALL_WITH_TOOLCHAIN ${SOURCES} DEPENDS
|
|
++add_llvm_library(LTO INSTALL_WITH_TOOLCHAIN ${SOURCES} DEPENDS
|
|
+ intrinsics_gen)
|
|
+
|
|
+ install(FILES ${LLVM_MAIN_INCLUDE_DIR}/llvm-c/lto.h
|
|
diff --git a/conda-recipes/llvm13-lto-static.patch b/conda-recipes/llvm13-lto-static.patch
|
|
new file mode 100644
|
|
index 000000000..b8a624250
|
|
--- /dev/null
|
|
+++ b/conda-recipes/llvm13-lto-static.patch
|
|
@@ -0,0 +1,12 @@
|
|
+diff -ur llvm-13.0.0.src-orig/tools/lto/CMakeLists.txt llvm-13.0.0.src/tools/lto/CMakeLists.txt
|
|
+--- llvm-13.0.0.src-orig/tools/lto/CMakeLists.txt 2021-09-24 12:18:10.000000000 -0400
|
|
++++ llvm-13.0.0.src/tools/lto/CMakeLists.txt 2022-03-31 17:07:07.000000000 -0400
|
|
+@@ -25,7 +25,7 @@
|
|
+ set(LTO_LIBRARY_TYPE MODULE)
|
|
+ set(LTO_LIBRARY_NAME libLTO)
|
|
+ else()
|
|
+- set(LTO_LIBRARY_TYPE SHARED)
|
|
++ set(LTO_LIBRARY_TYPE STATIC)
|
|
+ set(LTO_LIBRARY_NAME LTO)
|
|
+ endif()
|
|
+
|
|
diff --git a/conda-recipes/llvm14-remove-use-of-clonefile.patch b/conda-recipes/llvm14-remove-use-of-clonefile.patch
|
|
new file mode 100644
|
|
index 000000000..6ef9c9d61
|
|
--- /dev/null
|
|
+++ b/conda-recipes/llvm14-remove-use-of-clonefile.patch
|
|
@@ -0,0 +1,54 @@
|
|
+diff -ur a/llvm-14.0.6.src/lib/Support/Unix/Path.inc b/llvm-14.0.6.src/lib/Support/Unix/Path.inc
|
|
+--- a/llvm-14.0.6.src/lib/Support/Unix/Path.inc 2022-03-14 05:44:55.000000000 -0400
|
|
++++ b/llvm-14.0.6.src/lib/Support/Unix/Path.inc 2022-09-19 11:30:59.000000000 -0400
|
|
+@@ -1462,6 +1462,7 @@
|
|
+ std::error_code copy_file(const Twine &From, const Twine &To) {
|
|
+ std::string FromS = From.str();
|
|
+ std::string ToS = To.str();
|
|
++ /*
|
|
+ #if __has_builtin(__builtin_available)
|
|
+ if (__builtin_available(macos 10.12, *)) {
|
|
+ // Optimistically try to use clonefile() and handle errors, rather than
|
|
+@@ -1490,6 +1491,7 @@
|
|
+ // cheaper.
|
|
+ }
|
|
+ #endif
|
|
++ */
|
|
+ if (!copyfile(FromS.c_str(), ToS.c_str(), /*State=*/NULL, COPYFILE_DATA))
|
|
+ return std::error_code();
|
|
+ return std::error_code(errno, std::generic_category());
|
|
+diff -ur a/llvm-14.0.6.src/unittests/Support/Path.cpp b/llvm-14.0.6.src/unittests/Support/Path.cpp
|
|
+--- a/llvm-14.0.6.src/unittests/Support/Path.cpp 2022-03-14 05:44:55.000000000 -0400
|
|
++++ b/llvm-14.0.6.src/unittests/Support/Path.cpp 2022-09-19 11:33:07.000000000 -0400
|
|
+@@ -2267,15 +2267,15 @@
|
|
+
|
|
+ EXPECT_EQ(fs::setPermissions(TempPath, fs::set_uid_on_exe), NoError);
|
|
+ EXPECT_TRUE(CheckPermissions(fs::set_uid_on_exe));
|
|
+-
|
|
++#if !defined(__APPLE__)
|
|
+ EXPECT_EQ(fs::setPermissions(TempPath, fs::set_gid_on_exe), NoError);
|
|
+ EXPECT_TRUE(CheckPermissions(fs::set_gid_on_exe));
|
|
+-
|
|
++#endif
|
|
+ // Modern BSDs require root to set the sticky bit on files.
|
|
+ // AIX and Solaris without root will mask off (i.e., lose) the sticky bit
|
|
+ // on files.
|
|
+ #if !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) && \
|
|
+- !defined(_AIX) && !(defined(__sun__) && defined(__svr4__))
|
|
++ !defined(_AIX) && !(defined(__sun__) && defined(__svr4__)) && !defined(__APPLE__)
|
|
+ EXPECT_EQ(fs::setPermissions(TempPath, fs::sticky_bit), NoError);
|
|
+ EXPECT_TRUE(CheckPermissions(fs::sticky_bit));
|
|
+
|
|
+@@ -2297,10 +2297,12 @@
|
|
+ EXPECT_TRUE(CheckPermissions(fs::all_perms));
|
|
+ #endif // !FreeBSD && !NetBSD && !OpenBSD && !AIX
|
|
+
|
|
++#if !defined(__APPLE__)
|
|
+ EXPECT_EQ(fs::setPermissions(TempPath, fs::all_perms & ~fs::sticky_bit),
|
|
+ NoError);
|
|
+ EXPECT_TRUE(CheckPermissions(fs::all_perms & ~fs::sticky_bit));
|
|
+ #endif
|
|
++#endif
|
|
+ }
|
|
+
|
|
+ #ifdef _WIN32
|
|
diff --git a/conda-recipes/llvm14-svml.patch b/conda-recipes/llvm14-svml.patch
|
|
new file mode 100644
|
|
index 000000000..cdce26b34
|
|
--- /dev/null
|
|
+++ b/conda-recipes/llvm14-svml.patch
|
|
@@ -0,0 +1,2192 @@
|
|
+From bc2dcd190b7148d04772fa7fcd18b5200b758d4a Mon Sep 17 00:00:00 2001
|
|
+From: Ivan Butygin <ivan.butygin@gmail.com>
|
|
+Date: Sun, 24 Jul 2022 20:31:29 +0200
|
|
+Subject: [PATCH] Fixes vectorizer and extends SVML support
|
|
+
|
|
+Patch was updated to fix SVML calling convention issues uncovered by llvm 10.
|
|
+In previous versions of patch SVML calling convention was selected based on
|
|
+compilation settings. So if you try to call 256bit vector function from avx512
|
|
+code function will be called with avx512 cc which is incorrect. To fix this
|
|
+SVML cc was separated into 3 different cc for 128, 256 and 512bit vector lengths
|
|
+which are selected based on actual input vector length.
|
|
+
|
|
+Original patch merged several fixes:
|
|
+
|
|
+1. https://reviews.llvm.org/D47188 patch fixes the problem with improper calls
|
|
+to SVML library as it has non-standard calling conventions. So accordingly it
|
|
+has SVML calling conventions definitions and code to set CC to the vectorized
|
|
+calls. As SVML provides several implementations for the math functions we also
|
|
+took into consideration fast attribute and select more fast implementation in
|
|
+such case. This work is based on original Matt Masten's work.
|
|
+Author: Denis Nagorny
|
|
+
|
|
+2. https://reviews.llvm.org/D53035 patch implements support to legalize SVML
|
|
+calls by breaking down the illegal vector call instruction into multiple legal
|
|
+vector call instructions during code generation. Currently the vectorizer does
|
|
+not check legality of the generated SVML (or any VECLIB) call instructions, and
|
|
+this can lead to potential problems even during vector type legalization. This
|
|
+patch addresses this issue by adding a legality check during code generation and
|
|
+replaces the illegal SVML call with corresponding legalized instructions.
|
|
+(RFC: http://lists.llvm.org/pipermail/llvm-dev/2018-June/124357.html)
|
|
+Author: Karthik Senthil
|
|
+---
|
|
+ .../include/llvm/Analysis/TargetLibraryInfo.h | 22 +-
|
|
+ llvm/include/llvm/AsmParser/LLToken.h | 3 +
|
|
+ llvm/include/llvm/IR/CMakeLists.txt | 4 +
|
|
+ llvm/include/llvm/IR/CallingConv.h | 5 +
|
|
+ llvm/include/llvm/IR/SVML.td | 62 +++
|
|
+ llvm/lib/Analysis/CMakeLists.txt | 1 +
|
|
+ llvm/lib/Analysis/TargetLibraryInfo.cpp | 55 +-
|
|
+ llvm/lib/AsmParser/LLLexer.cpp | 3 +
|
|
+ llvm/lib/AsmParser/LLParser.cpp | 6 +
|
|
+ llvm/lib/CodeGen/ReplaceWithVeclib.cpp | 2 +-
|
|
+ llvm/lib/IR/AsmWriter.cpp | 3 +
|
|
+ llvm/lib/IR/Verifier.cpp | 3 +
|
|
+ llvm/lib/Target/X86/X86CallingConv.td | 70 +++
|
|
+ llvm/lib/Target/X86/X86ISelLowering.cpp | 3 +-
|
|
+ llvm/lib/Target/X86/X86RegisterInfo.cpp | 46 ++
|
|
+ llvm/lib/Target/X86/X86Subtarget.h | 3 +
|
|
+ .../Transforms/Utils/InjectTLIMappings.cpp | 2 +-
|
|
+ .../Transforms/Vectorize/LoopVectorize.cpp | 269 +++++++++
|
|
+ .../Generic/replace-intrinsics-with-veclib.ll | 4 +-
|
|
+ .../LoopVectorize/X86/svml-calls-finite.ll | 24 +-
|
|
+ .../LoopVectorize/X86/svml-calls.ll | 108 ++--
|
|
+ .../LoopVectorize/X86/svml-legal-calls.ll | 513 ++++++++++++++++++
|
|
+ .../LoopVectorize/X86/svml-legal-codegen.ll | 61 +++
|
|
+ llvm/test/Transforms/Util/add-TLI-mappings.ll | 18 +-
|
|
+ llvm/utils/TableGen/CMakeLists.txt | 1 +
|
|
+ llvm/utils/TableGen/SVMLEmitter.cpp | 110 ++++
|
|
+ llvm/utils/TableGen/TableGen.cpp | 8 +-
|
|
+ llvm/utils/TableGen/TableGenBackends.h | 1 +
|
|
+ llvm/utils/vim/syntax/llvm.vim | 1 +
|
|
+ 29 files changed, 1341 insertions(+), 70 deletions(-)
|
|
+ create mode 100644 llvm/include/llvm/IR/SVML.td
|
|
+ create mode 100644 llvm/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll
|
|
+ create mode 100644 llvm/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll
|
|
+ create mode 100644 llvm/utils/TableGen/SVMLEmitter.cpp
|
|
+
|
|
+diff --git a/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h b/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h
|
|
+index 17d1e3f770c14..110ff08189867 100644
|
|
+--- a/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h
|
|
++++ b/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h
|
|
+@@ -39,6 +39,12 @@ struct VecDesc {
|
|
+ NotLibFunc
|
|
+ };
|
|
+
|
|
++enum SVMLAccuracy {
|
|
++ SVML_DEFAULT,
|
|
++ SVML_HA,
|
|
++ SVML_EP
|
|
++};
|
|
++
|
|
+ /// Implementation of the target library information.
|
|
+ ///
|
|
+ /// This class constructs tables that hold the target library information and
|
|
+@@ -157,7 +163,7 @@ class TargetLibraryInfoImpl {
|
|
+ /// Return true if the function F has a vector equivalent with vectorization
|
|
+ /// factor VF.
|
|
+ bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const {
|
|
+- return !getVectorizedFunction(F, VF).empty();
|
|
++ return !getVectorizedFunction(F, VF, false).empty();
|
|
+ }
|
|
+
|
|
+ /// Return true if the function F has a vector equivalent with any
|
|
+@@ -166,7 +172,10 @@ class TargetLibraryInfoImpl {
|
|
+
|
|
+ /// Return the name of the equivalent of F, vectorized with factor VF. If no
|
|
+ /// such mapping exists, return the empty string.
|
|
+- StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const;
|
|
++ std::string getVectorizedFunction(StringRef F, const ElementCount &VF, bool IsFast) const;
|
|
++
|
|
++ Optional<CallingConv::ID> getVectorizedFunctionCallingConv(
|
|
++ StringRef F, const FunctionType &FTy, const DataLayout &DL) const;
|
|
+
|
|
+ /// Set to true iff i32 parameters to library functions should have signext
|
|
+ /// or zeroext attributes if they correspond to C-level int or unsigned int,
|
|
+@@ -326,8 +335,13 @@ class TargetLibraryInfo {
|
|
+ bool isFunctionVectorizable(StringRef F) const {
|
|
+ return Impl->isFunctionVectorizable(F);
|
|
+ }
|
|
+- StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const {
|
|
+- return Impl->getVectorizedFunction(F, VF);
|
|
++ std::string getVectorizedFunction(StringRef F, const ElementCount &VF, bool IsFast) const {
|
|
++ return Impl->getVectorizedFunction(F, VF, IsFast);
|
|
++ }
|
|
++
|
|
++ Optional<CallingConv::ID> getVectorizedFunctionCallingConv(
|
|
++ StringRef F, const FunctionType &FTy, const DataLayout &DL) const {
|
|
++ return Impl->getVectorizedFunctionCallingConv(F, FTy, DL);
|
|
+ }
|
|
+
|
|
+ /// Tests if the function is both available and a candidate for optimized code
|
|
+diff --git a/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h b/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h
|
|
+index 78ebb35e0ea4d..3ffb57db8b18b 100644
|
|
+--- a/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h
|
|
++++ b/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h
|
|
+@@ -133,6 +133,9 @@ enum Kind {
|
|
+ kw_fastcc,
|
|
+ kw_coldcc,
|
|
+ kw_intel_ocl_bicc,
|
|
++ kw_intel_svmlcc128,
|
|
++ kw_intel_svmlcc256,
|
|
++ kw_intel_svmlcc512,
|
|
+ kw_cfguard_checkcc,
|
|
+ kw_x86_stdcallcc,
|
|
+ kw_x86_fastcallcc,
|
|
+diff --git a/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt b/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt
|
|
+index 0498fc269b634..23bb3de41bc1a 100644
|
|
+--- a/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt
|
|
++++ b/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt
|
|
+@@ -20,3 +20,7 @@ tablegen(LLVM IntrinsicsX86.h -gen-intrinsic-enums -intrinsic-prefix=x86)
|
|
+ tablegen(LLVM IntrinsicsXCore.h -gen-intrinsic-enums -intrinsic-prefix=xcore)
|
|
+ tablegen(LLVM IntrinsicsVE.h -gen-intrinsic-enums -intrinsic-prefix=ve)
|
|
+ add_public_tablegen_target(intrinsics_gen)
|
|
++
|
|
++set(LLVM_TARGET_DEFINITIONS SVML.td)
|
|
++tablegen(LLVM SVML.inc -gen-svml)
|
|
++add_public_tablegen_target(svml_gen)
|
|
+diff --git a/llvm-14.0.6.src/include/llvm/IR/CallingConv.h b/llvm-14.0.6.src/include/llvm/IR/CallingConv.h
|
|
+index fd28542465225..096eea1a8e19b 100644
|
|
+--- a/llvm-14.0.6.src/include/llvm/IR/CallingConv.h
|
|
++++ b/llvm-14.0.6.src/include/llvm/IR/CallingConv.h
|
|
+@@ -252,6 +252,11 @@ namespace CallingConv {
|
|
+ /// M68k_INTR - Calling convention used for M68k interrupt routines.
|
|
+ M68k_INTR = 101,
|
|
+
|
|
++ /// Intel_SVML - Calling conventions for Intel Short Math Vector Library
|
|
++ Intel_SVML128 = 102,
|
|
++ Intel_SVML256 = 103,
|
|
++ Intel_SVML512 = 104,
|
|
++
|
|
+ /// The highest possible calling convention ID. Must be some 2^k - 1.
|
|
+ MaxID = 1023
|
|
+ };
|
|
+diff --git a/llvm-14.0.6.src/include/llvm/IR/SVML.td b/llvm-14.0.6.src/include/llvm/IR/SVML.td
|
|
+new file mode 100644
|
|
+index 0000000000000..5af710404c9d9
|
|
+--- /dev/null
|
|
++++ b/llvm-14.0.6.src/include/llvm/IR/SVML.td
|
|
+@@ -0,0 +1,62 @@
|
|
++//===-- Intel_SVML.td - Defines SVML call variants ---------*- tablegen -*-===//
|
|
++//
|
|
++// The LLVM Compiler Infrastructure
|
|
++//
|
|
++// This file is distributed under the University of Illinois Open Source
|
|
++// License. See LICENSE.TXT for details.
|
|
++//
|
|
++//===----------------------------------------------------------------------===//
|
|
++//
|
|
++// This file is used by TableGen to define the different typs of SVML function
|
|
++// variants used with -fveclib=SVML.
|
|
++//
|
|
++//===----------------------------------------------------------------------===//
|
|
++
|
|
++class SvmlVariant;
|
|
++
|
|
++def sin : SvmlVariant;
|
|
++def cos : SvmlVariant;
|
|
++def pow : SvmlVariant;
|
|
++def exp : SvmlVariant;
|
|
++def log : SvmlVariant;
|
|
++def acos : SvmlVariant;
|
|
++def acosh : SvmlVariant;
|
|
++def asin : SvmlVariant;
|
|
++def asinh : SvmlVariant;
|
|
++def atan2 : SvmlVariant;
|
|
++def atan : SvmlVariant;
|
|
++def atanh : SvmlVariant;
|
|
++def cbrt : SvmlVariant;
|
|
++def cdfnorm : SvmlVariant;
|
|
++def cdfnorminv : SvmlVariant;
|
|
++def cosd : SvmlVariant;
|
|
++def cosh : SvmlVariant;
|
|
++def erf : SvmlVariant;
|
|
++def erfc : SvmlVariant;
|
|
++def erfcinv : SvmlVariant;
|
|
++def erfinv : SvmlVariant;
|
|
++def exp10 : SvmlVariant;
|
|
++def exp2 : SvmlVariant;
|
|
++def expm1 : SvmlVariant;
|
|
++def hypot : SvmlVariant;
|
|
++def invsqrt : SvmlVariant;
|
|
++def log10 : SvmlVariant;
|
|
++def log1p : SvmlVariant;
|
|
++def log2 : SvmlVariant;
|
|
++def sind : SvmlVariant;
|
|
++def sinh : SvmlVariant;
|
|
++def sqrt : SvmlVariant;
|
|
++def tan : SvmlVariant;
|
|
++def tanh : SvmlVariant;
|
|
++
|
|
++// TODO: SVML does not currently provide _ha and _ep variants of these fucnctions.
|
|
++// We should call the default variant of these functions in all cases instead.
|
|
++
|
|
++// def nearbyint : SvmlVariant;
|
|
++// def logb : SvmlVariant;
|
|
++// def floor : SvmlVariant;
|
|
++// def fmod : SvmlVariant;
|
|
++// def ceil : SvmlVariant;
|
|
++// def trunc : SvmlVariant;
|
|
++// def rint : SvmlVariant;
|
|
++// def round : SvmlVariant;
|
|
+diff --git a/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt b/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt
|
|
+index aec84124129f4..98286e166fbe2 100644
|
|
+--- a/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt
|
|
++++ b/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt
|
|
+@@ -150,6 +150,7 @@ add_llvm_component_library(LLVMAnalysis
|
|
+ DEPENDS
|
|
+ intrinsics_gen
|
|
+ ${MLDeps}
|
|
++ svml_gen
|
|
+
|
|
+ LINK_LIBS
|
|
+ ${MLLinkDeps}
|
|
+diff --git a/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp b/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp
|
|
+index 02923c2c7eb14..83abde28a62a4 100644
|
|
+--- a/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp
|
|
++++ b/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp
|
|
+@@ -110,6 +110,11 @@ bool TargetLibraryInfoImpl::isCallingConvCCompatible(Function *F) {
|
|
+ F->getFunctionType());
|
|
+ }
|
|
+
|
|
++static std::string svmlMangle(StringRef FnName, const bool IsFast) {
|
|
++ std::string FullName = FnName.str();
|
|
++ return IsFast ? FullName : FullName + "_ha";
|
|
++}
|
|
++
|
|
+ /// Initialize the set of available library functions based on the specified
|
|
+ /// target triple. This should be carefully written so that a missing target
|
|
+ /// triple gets a sane set of defaults.
|
|
+@@ -1876,8 +1881,9 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
|
|
+ }
|
|
+ case SVML: {
|
|
+ const VecDesc VecFuncs[] = {
|
|
+- #define TLI_DEFINE_SVML_VECFUNCS
|
|
+- #include "llvm/Analysis/VecFuncs.def"
|
|
++ #define GET_SVML_VARIANTS
|
|
++ #include "llvm/IR/SVML.inc"
|
|
++ #undef GET_SVML_VARIANTS
|
|
+ };
|
|
+ addVectorizableFunctions(VecFuncs);
|
|
+ break;
|
|
+@@ -1897,20 +1903,51 @@ bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
|
|
+ return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName;
|
|
+ }
|
|
+
|
|
+-StringRef
|
|
+-TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
|
|
+- const ElementCount &VF) const {
|
|
++std::string TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
|
|
++ const ElementCount &VF,
|
|
++ bool IsFast) const {
|
|
++ bool FromSVML = ClVectorLibrary == SVML;
|
|
+ F = sanitizeFunctionName(F);
|
|
+ if (F.empty())
|
|
+- return F;
|
|
++ return F.str();
|
|
+ std::vector<VecDesc>::const_iterator I =
|
|
+ llvm::lower_bound(VectorDescs, F, compareWithScalarFnName);
|
|
+ while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) {
|
|
+- if (I->VectorizationFactor == VF)
|
|
+- return I->VectorFnName;
|
|
++ if (I->VectorizationFactor == VF) {
|
|
++ if (FromSVML) {
|
|
++ return svmlMangle(I->VectorFnName, IsFast);
|
|
++ }
|
|
++ return I->VectorFnName.str();
|
|
++ }
|
|
+ ++I;
|
|
+ }
|
|
+- return StringRef();
|
|
++ return std::string();
|
|
++}
|
|
++
|
|
++static CallingConv::ID getSVMLCallingConv(const DataLayout &DL, const FunctionType &FType)
|
|
++{
|
|
++ assert(isa<VectorType>(FType.getReturnType()));
|
|
++ auto *VecCallRetType = cast<VectorType>(FType.getReturnType());
|
|
++ auto TypeBitWidth = DL.getTypeSizeInBits(VecCallRetType);
|
|
++ if (TypeBitWidth == 128) {
|
|
++ return CallingConv::Intel_SVML128;
|
|
++ } else if (TypeBitWidth == 256) {
|
|
++ return CallingConv::Intel_SVML256;
|
|
++ } else if (TypeBitWidth == 512) {
|
|
++ return CallingConv::Intel_SVML512;
|
|
++ } else {
|
|
++ llvm_unreachable("Invalid vector width");
|
|
++ }
|
|
++ return 0; // not reachable
|
|
++}
|
|
++
|
|
++Optional<CallingConv::ID>
|
|
++TargetLibraryInfoImpl::getVectorizedFunctionCallingConv(
|
|
++ StringRef F, const FunctionType &FTy, const DataLayout &DL) const {
|
|
++ if (F.startswith("__svml")) {
|
|
++ return getSVMLCallingConv(DL, FTy);
|
|
++ }
|
|
++ return {};
|
|
+ }
|
|
+
|
|
+ TargetLibraryInfo TargetLibraryAnalysis::run(const Function &F,
|
|
+diff --git a/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp b/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp
|
|
+index e3bf41c9721b6..4f9dccd4e0724 100644
|
|
+--- a/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp
|
|
++++ b/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp
|
|
+@@ -603,6 +603,9 @@ lltok::Kind LLLexer::LexIdentifier() {
|
|
+ KEYWORD(spir_kernel);
|
|
+ KEYWORD(spir_func);
|
|
+ KEYWORD(intel_ocl_bicc);
|
|
++ KEYWORD(intel_svmlcc128);
|
|
++ KEYWORD(intel_svmlcc256);
|
|
++ KEYWORD(intel_svmlcc512);
|
|
+ KEYWORD(x86_64_sysvcc);
|
|
+ KEYWORD(win64cc);
|
|
+ KEYWORD(x86_regcallcc);
|
|
+diff --git a/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp b/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp
|
|
+index 432ec151cf8ae..3bd6ee61024b8 100644
|
|
+--- a/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp
|
|
++++ b/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp
|
|
+@@ -1781,6 +1781,9 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
|
|
+ /// ::= 'ccc'
|
|
+ /// ::= 'fastcc'
|
|
+ /// ::= 'intel_ocl_bicc'
|
|
++/// ::= 'intel_svmlcc128'
|
|
++/// ::= 'intel_svmlcc256'
|
|
++/// ::= 'intel_svmlcc512'
|
|
+ /// ::= 'coldcc'
|
|
+ /// ::= 'cfguard_checkcc'
|
|
+ /// ::= 'x86_stdcallcc'
|
|
+@@ -1850,6 +1853,9 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
|
|
+ case lltok::kw_spir_kernel: CC = CallingConv::SPIR_KERNEL; break;
|
|
+ case lltok::kw_spir_func: CC = CallingConv::SPIR_FUNC; break;
|
|
+ case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break;
|
|
++ case lltok::kw_intel_svmlcc128:CC = CallingConv::Intel_SVML128; break;
|
|
++ case lltok::kw_intel_svmlcc256:CC = CallingConv::Intel_SVML256; break;
|
|
++ case lltok::kw_intel_svmlcc512:CC = CallingConv::Intel_SVML512; break;
|
|
+ case lltok::kw_x86_64_sysvcc: CC = CallingConv::X86_64_SysV; break;
|
|
+ case lltok::kw_win64cc: CC = CallingConv::Win64; break;
|
|
+ case lltok::kw_webkit_jscc: CC = CallingConv::WebKit_JS; break;
|
|
+diff --git a/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp
|
|
+index 0ff045fa787e8..175651949ef85 100644
|
|
+--- a/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp
|
|
++++ b/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp
|
|
+@@ -157,7 +157,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
|
|
+ // and the exact vector width of the call operands in the
|
|
+ // TargetLibraryInfo.
|
|
+ const std::string TLIName =
|
|
+- std::string(TLI.getVectorizedFunction(ScalarName, VF));
|
|
++ std::string(TLI.getVectorizedFunction(ScalarName, VF, CI.getFastMathFlags().isFast()));
|
|
+
|
|
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
|
|
+ << ScalarName << "` and vector width " << VF << ".\n");
|
|
+diff --git a/llvm-14.0.6.src/lib/IR/AsmWriter.cpp b/llvm-14.0.6.src/lib/IR/AsmWriter.cpp
|
|
+index 179754e275b03..c4e95752c97e8 100644
|
|
+--- a/llvm-14.0.6.src/lib/IR/AsmWriter.cpp
|
|
++++ b/llvm-14.0.6.src/lib/IR/AsmWriter.cpp
|
|
+@@ -306,6 +306,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
|
|
+ case CallingConv::X86_RegCall: Out << "x86_regcallcc"; break;
|
|
+ case CallingConv::X86_VectorCall:Out << "x86_vectorcallcc"; break;
|
|
+ case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break;
|
|
++ case CallingConv::Intel_SVML128: Out << "intel_svmlcc128"; break;
|
|
++ case CallingConv::Intel_SVML256: Out << "intel_svmlcc256"; break;
|
|
++ case CallingConv::Intel_SVML512: Out << "intel_svmlcc512"; break;
|
|
+ case CallingConv::ARM_APCS: Out << "arm_apcscc"; break;
|
|
+ case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break;
|
|
+ case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break;
|
|
+diff --git a/llvm-14.0.6.src/lib/IR/Verifier.cpp b/llvm-14.0.6.src/lib/IR/Verifier.cpp
|
|
+index 989d01e2e3950..bae7382a36e13 100644
|
|
+--- a/llvm-14.0.6.src/lib/IR/Verifier.cpp
|
|
++++ b/llvm-14.0.6.src/lib/IR/Verifier.cpp
|
|
+@@ -2457,6 +2457,9 @@ void Verifier::visitFunction(const Function &F) {
|
|
+ case CallingConv::Fast:
|
|
+ case CallingConv::Cold:
|
|
+ case CallingConv::Intel_OCL_BI:
|
|
++ case CallingConv::Intel_SVML128:
|
|
++ case CallingConv::Intel_SVML256:
|
|
++ case CallingConv::Intel_SVML512:
|
|
+ case CallingConv::PTX_Kernel:
|
|
+ case CallingConv::PTX_Device:
|
|
+ Assert(!F.isVarArg(), "Calling convention does not support varargs or "
|
|
+diff --git a/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td b/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td
|
|
+index 4dd8a6cdd8982..12e65521215e4 100644
|
|
+--- a/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td
|
|
++++ b/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td
|
|
+@@ -498,6 +498,21 @@ def RetCC_X86_64 : CallingConv<[
|
|
+ CCDelegateTo<RetCC_X86_64_C>
|
|
+ ]>;
|
|
+
|
|
++// Intel_SVML return-value convention.
|
|
++def RetCC_Intel_SVML : CallingConv<[
|
|
++ // Vector types are returned in XMM0,XMM1
|
|
++ CCIfType<[v4f32, v2f64],
|
|
++ CCAssignToReg<[XMM0,XMM1]>>,
|
|
++
|
|
++ // 256-bit FP vectors
|
|
++ CCIfType<[v8f32, v4f64],
|
|
++ CCAssignToReg<[YMM0,YMM1]>>,
|
|
++
|
|
++ // 512-bit FP vectors
|
|
++ CCIfType<[v16f32, v8f64],
|
|
++ CCAssignToReg<[ZMM0,ZMM1]>>
|
|
++]>;
|
|
++
|
|
+ // This is the return-value convention used for the entire X86 backend.
|
|
+ let Entry = 1 in
|
|
+ def RetCC_X86 : CallingConv<[
|
|
+@@ -505,6 +520,10 @@ def RetCC_X86 : CallingConv<[
|
|
+ // Check if this is the Intel OpenCL built-ins calling convention
|
|
+ CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<RetCC_Intel_OCL_BI>>,
|
|
+
|
|
++ CCIfCC<"CallingConv::Intel_SVML128", CCDelegateTo<RetCC_Intel_SVML>>,
|
|
++ CCIfCC<"CallingConv::Intel_SVML256", CCDelegateTo<RetCC_Intel_SVML>>,
|
|
++ CCIfCC<"CallingConv::Intel_SVML512", CCDelegateTo<RetCC_Intel_SVML>>,
|
|
++
|
|
+ CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
|
|
+ CCDelegateTo<RetCC_X86_32>
|
|
+ ]>;
|
|
+@@ -1064,6 +1083,30 @@ def CC_Intel_OCL_BI : CallingConv<[
|
|
+ CCDelegateTo<CC_X86_32_C>
|
|
+ ]>;
|
|
+
|
|
++// X86-64 Intel Short Vector Math Library calling convention.
|
|
++def CC_Intel_SVML : CallingConv<[
|
|
++
|
|
++ // The SSE vector arguments are passed in XMM registers.
|
|
++ CCIfType<[v4f32, v2f64],
|
|
++ CCAssignToReg<[XMM0, XMM1, XMM2]>>,
|
|
++
|
|
++ // The 256-bit vector arguments are passed in YMM registers.
|
|
++ CCIfType<[v8f32, v4f64],
|
|
++ CCAssignToReg<[YMM0, YMM1, YMM2]>>,
|
|
++
|
|
++ // The 512-bit vector arguments are passed in ZMM registers.
|
|
++ CCIfType<[v16f32, v8f64],
|
|
++ CCAssignToReg<[ZMM0, ZMM1, ZMM2]>>
|
|
++]>;
|
|
++
|
|
++def CC_X86_32_Intr : CallingConv<[
|
|
++ CCAssignToStack<4, 4>
|
|
++]>;
|
|
++
|
|
++def CC_X86_64_Intr : CallingConv<[
|
|
++ CCAssignToStack<8, 8>
|
|
++]>;
|
|
++
|
|
+ //===----------------------------------------------------------------------===//
|
|
+ // X86 Root Argument Calling Conventions
|
|
+ //===----------------------------------------------------------------------===//
|
|
+@@ -1115,6 +1158,9 @@ def CC_X86_64 : CallingConv<[
|
|
+ let Entry = 1 in
|
|
+ def CC_X86 : CallingConv<[
|
|
+ CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<CC_Intel_OCL_BI>>,
|
|
++ CCIfCC<"CallingConv::Intel_SVML128", CCDelegateTo<CC_Intel_SVML>>,
|
|
++ CCIfCC<"CallingConv::Intel_SVML256", CCDelegateTo<CC_Intel_SVML>>,
|
|
++ CCIfCC<"CallingConv::Intel_SVML512", CCDelegateTo<CC_Intel_SVML>>,
|
|
+ CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
|
|
+ CCDelegateTo<CC_X86_32>
|
|
+ ]>;
|
|
+@@ -1227,3 +1273,27 @@ def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP,
|
|
+ (sequence "R%u", 12, 15))>;
|
|
+ def CSR_SysV64_RegCall : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,
|
|
+ (sequence "XMM%u", 8, 15))>;
|
|
++
|
|
++// SVML calling convention
|
|
++def CSR_32_Intel_SVML : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE)>;
|
|
++def CSR_32_Intel_SVML_AVX512 : CalleeSavedRegs<(add CSR_32_Intel_SVML,
|
|
++ K4, K5, K6, K7)>;
|
|
++
|
|
++def CSR_64_Intel_SVML_NoSSE : CalleeSavedRegs<(add RBX, RSI, RDI, RBP, RSP, R12, R13, R14, R15)>;
|
|
++
|
|
++def CSR_64_Intel_SVML : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
|
|
++ (sequence "XMM%u", 8, 15))>;
|
|
++def CSR_Win64_Intel_SVML : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
|
|
++ (sequence "XMM%u", 6, 15))>;
|
|
++
|
|
++def CSR_64_Intel_SVML_AVX : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
|
|
++ (sequence "YMM%u", 8, 15))>;
|
|
++def CSR_Win64_Intel_SVML_AVX : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
|
|
++ (sequence "YMM%u", 6, 15))>;
|
|
++
|
|
++def CSR_64_Intel_SVML_AVX512 : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
|
|
++ (sequence "ZMM%u", 16, 31),
|
|
++ K4, K5, K6, K7)>;
|
|
++def CSR_Win64_Intel_SVML_AVX512 : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
|
|
++ (sequence "ZMM%u", 6, 21),
|
|
++ K4, K5, K6, K7)>;
|
|
+diff --git a/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp b/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp
|
|
+index 8bb7e81e19bbd..1780ce3fc6467 100644
|
|
+--- a/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp
|
|
++++ b/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp
|
|
+@@ -3788,7 +3788,8 @@ void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
|
|
+ // FIXME: Only some x86_32 calling conventions support AVX512.
|
|
+ if (Subtarget.useAVX512Regs() &&
|
|
+ (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
|
|
+- CallConv == CallingConv::Intel_OCL_BI)))
|
|
++ CallConv == CallingConv::Intel_OCL_BI ||
|
|
++ CallConv == CallingConv::Intel_SVML512)))
|
|
+ VecVT = MVT::v16f32;
|
|
+ else if (Subtarget.hasAVX())
|
|
+ VecVT = MVT::v8f32;
|
|
+diff --git a/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp b/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp
|
|
+index 130cb61cdde24..9eec3b25ca9f2 100644
|
|
+--- a/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp
|
|
++++ b/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp
|
|
+@@ -272,6 +272,42 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
|
|
+ }
|
|
+ }
|
|
+
|
|
++namespace {
|
|
++std::pair<const uint32_t *, const MCPhysReg *> getSVMLRegMaskAndSaveList(
|
|
++ bool Is64Bit, bool IsWin64, CallingConv::ID CC) {
|
|
++ assert(CC >= CallingConv::Intel_SVML128 && CC <= CallingConv::Intel_SVML512);
|
|
++ unsigned Abi = CC - CallingConv::Intel_SVML128 ; // 0 - 128, 1 - 256, 2 - 512
|
|
++
|
|
++ const std::pair<const uint32_t *, const MCPhysReg *> Abi64[] = {
|
|
++ std::make_pair(CSR_64_Intel_SVML_RegMask, CSR_64_Intel_SVML_SaveList),
|
|
++ std::make_pair(CSR_64_Intel_SVML_AVX_RegMask, CSR_64_Intel_SVML_AVX_SaveList),
|
|
++ std::make_pair(CSR_64_Intel_SVML_AVX512_RegMask, CSR_64_Intel_SVML_AVX512_SaveList),
|
|
++ };
|
|
++
|
|
++ const std::pair<const uint32_t *, const MCPhysReg *> AbiWin64[] = {
|
|
++ std::make_pair(CSR_Win64_Intel_SVML_RegMask, CSR_Win64_Intel_SVML_SaveList),
|
|
++ std::make_pair(CSR_Win64_Intel_SVML_AVX_RegMask, CSR_Win64_Intel_SVML_AVX_SaveList),
|
|
++ std::make_pair(CSR_Win64_Intel_SVML_AVX512_RegMask, CSR_Win64_Intel_SVML_AVX512_SaveList),
|
|
++ };
|
|
++
|
|
++ const std::pair<const uint32_t *, const MCPhysReg *> Abi32[] = {
|
|
++ std::make_pair(CSR_32_Intel_SVML_RegMask, CSR_32_Intel_SVML_SaveList),
|
|
++ std::make_pair(CSR_32_Intel_SVML_RegMask, CSR_32_Intel_SVML_SaveList),
|
|
++ std::make_pair(CSR_32_Intel_SVML_AVX512_RegMask, CSR_32_Intel_SVML_AVX512_SaveList),
|
|
++ };
|
|
++
|
|
++ if (Is64Bit) {
|
|
++ if (IsWin64) {
|
|
++ return AbiWin64[Abi];
|
|
++ } else {
|
|
++ return Abi64[Abi];
|
|
++ }
|
|
++ } else {
|
|
++ return Abi32[Abi];
|
|
++ }
|
|
++}
|
|
++}
|
|
++
|
|
+ const MCPhysReg *
|
|
+ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
|
+ assert(MF && "MachineFunction required");
|
|
+@@ -327,6 +363,11 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
|
+ return CSR_64_Intel_OCL_BI_SaveList;
|
|
+ break;
|
|
+ }
|
|
++ case CallingConv::Intel_SVML128:
|
|
++ case CallingConv::Intel_SVML256:
|
|
++ case CallingConv::Intel_SVML512: {
|
|
++ return getSVMLRegMaskAndSaveList(Is64Bit, IsWin64, CC).second;
|
|
++ }
|
|
+ case CallingConv::HHVM:
|
|
+ return CSR_64_HHVM_SaveList;
|
|
+ case CallingConv::X86_RegCall:
|
|
+@@ -449,6 +490,11 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
|
|
+ return CSR_64_Intel_OCL_BI_RegMask;
|
|
+ break;
|
|
+ }
|
|
++ case CallingConv::Intel_SVML128:
|
|
++ case CallingConv::Intel_SVML256:
|
|
++ case CallingConv::Intel_SVML512: {
|
|
++ return getSVMLRegMaskAndSaveList(Is64Bit, IsWin64, CC).first;
|
|
++ }
|
|
+ case CallingConv::HHVM:
|
|
+ return CSR_64_HHVM_RegMask;
|
|
+ case CallingConv::X86_RegCall:
|
|
+diff --git a/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h b/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h
|
|
+index 5d773f0c57dfb..6bdf5bc6f3fe9 100644
|
|
+--- a/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h
|
|
++++ b/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h
|
|
+@@ -916,6 +916,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
|
|
+ case CallingConv::X86_ThisCall:
|
|
+ case CallingConv::X86_VectorCall:
|
|
+ case CallingConv::Intel_OCL_BI:
|
|
++ case CallingConv::Intel_SVML128:
|
|
++ case CallingConv::Intel_SVML256:
|
|
++ case CallingConv::Intel_SVML512:
|
|
+ return isTargetWin64();
|
|
+ // This convention allows using the Win64 convention on other targets.
|
|
+ case CallingConv::Win64:
|
|
+diff --git a/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp
|
|
+index 047bf5569ded3..59897785f156c 100644
|
|
+--- a/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp
|
|
++++ b/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp
|
|
+@@ -92,7 +92,7 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
|
|
+
|
|
+ auto AddVariantDecl = [&](const ElementCount &VF) {
|
|
+ const std::string TLIName =
|
|
+- std::string(TLI.getVectorizedFunction(ScalarName, VF));
|
|
++ std::string(TLI.getVectorizedFunction(ScalarName, VF, CI.getFastMathFlags().isFast()));
|
|
+ if (!TLIName.empty()) {
|
|
+ std::string MangledName =
|
|
+ VFABI::mangleTLIVectorName(TLIName, ScalarName, CI.arg_size(), VF);
|
|
+diff --git a/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp
|
|
+index 46ff0994e04e7..f472af5e1a835 100644
|
|
+--- a/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp
|
|
++++ b/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp
|
|
+@@ -712,6 +712,27 @@ class InnerLoopVectorizer {
|
|
+ virtual void printDebugTracesAtStart(){};
|
|
+ virtual void printDebugTracesAtEnd(){};
|
|
+
|
|
++ /// Check legality of given SVML call instruction \p VecCall generated for
|
|
++ /// scalar call \p Call. If illegal then the appropriate legal instruction
|
|
++ /// is returned.
|
|
++ Value *legalizeSVMLCall(CallInst *VecCall, CallInst *Call);
|
|
++
|
|
++ /// Returns the legal VF for a call instruction \p CI using TTI information
|
|
++ /// and vector type.
|
|
++ ElementCount getLegalVFForCall(CallInst *CI);
|
|
++
|
|
++ /// Partially vectorize a given call \p Call by breaking it down into multiple
|
|
++ /// calls of \p LegalCall, decided by the variant VF \p LegalVF.
|
|
++ Value *partialVectorizeCall(CallInst *Call, CallInst *LegalCall,
|
|
++ unsigned LegalVF);
|
|
++
|
|
++ /// Generate shufflevector instruction for a vector value \p V based on the
|
|
++ /// current \p Part and a smaller VF \p LegalVF.
|
|
++ Value *generateShuffleValue(Value *V, unsigned LegalVF, unsigned Part);
|
|
++
|
|
++ /// Combine partially vectorized calls stored in \p CallResults.
|
|
++ Value *combinePartialVecCalls(SmallVectorImpl<Value *> &CallResults);
|
|
++
|
|
+ /// The original loop.
|
|
+ Loop *OrigLoop;
|
|
+
|
|
+@@ -4596,6 +4617,17 @@ static bool mayDivideByZero(Instruction &I) {
|
|
+ return !CInt || CInt->isZero();
|
|
+ }
|
|
+
|
|
++static void setVectorFunctionCallingConv(CallInst &CI, const DataLayout &DL,
|
|
++ const TargetLibraryInfo &TLI) {
|
|
++ Function *VectorF = CI.getCalledFunction();
|
|
++ FunctionType *FTy = VectorF->getFunctionType();
|
|
++ StringRef VFName = VectorF->getName();
|
|
++ auto CC = TLI.getVectorizedFunctionCallingConv(VFName, *FTy, DL);
|
|
++ if (CC) {
|
|
++ CI.setCallingConv(*CC);
|
|
++ }
|
|
++}
|
|
++
|
|
+ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
|
|
+ VPUser &ArgOperands,
|
|
+ VPTransformState &State) {
|
|
+@@ -4664,9 +4696,246 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
|
|
+ if (isa<FPMathOperator>(V))
|
|
+ V->copyFastMathFlags(CI);
|
|
+
|
|
++ const DataLayout &DL = V->getModule()->getDataLayout();
|
|
++ setVectorFunctionCallingConv(*V, DL, *TLI);
|
|
++
|
|
++ // Perform legalization of SVML call instruction only if original call
|
|
++ // was not Intrinsic
|
|
++ if (!UseVectorIntrinsic &&
|
|
++ (V->getCalledFunction()->getName()).startswith("__svml")) {
|
|
++ // assert((V->getCalledFunction()->getName()).startswith("__svml"));
|
|
++ LLVM_DEBUG(dbgs() << "LV(SVML): Vector call inst:"; V->dump());
|
|
++ auto *LegalV = cast<Instruction>(legalizeSVMLCall(V, CI));
|
|
++ LLVM_DEBUG(dbgs() << "LV: Completed SVML legalization.\n LegalV: ";
|
|
++ LegalV->dump());
|
|
++ State.set(Def, LegalV, Part);
|
|
++ addMetadata(LegalV, &I);
|
|
++ } else {
|
|
+ State.set(Def, V, Part);
|
|
+ addMetadata(V, &I);
|
|
++ }
|
|
++ }
|
|
++}
|
|
++
|
|
++//===----------------------------------------------------------------------===//
|
|
++// Implementation of functions for SVML vector call legalization.
|
|
++//===----------------------------------------------------------------------===//
|
|
++//
|
|
++// Unlike other VECLIBs, SVML needs to be used with target-legal
|
|
++// vector types. Otherwise, link failures and/or runtime failures
|
|
++// will occur. A motivating example could be -
|
|
++//
|
|
++// double *a;
|
|
++// float *b;
|
|
++// #pragma clang loop vectorize_width(8)
|
|
++// for(i = 0; i < N; ++i) {
|
|
++// a[i] = sin(i); // Legal SVML VF must be 4 or below on AVX
|
|
++// b[i] = cosf(i); // VF can be 8 on AVX since 8 floats can fit in YMM
|
|
++// }
|
|
++//
|
|
++// Current implementation of vector code generation in LV is
|
|
++// driven based on a single VF (in InnerLoopVectorizer::VF). This
|
|
++// inhibits the flexibility of adjusting/choosing different VF
|
|
++// for different instructions.
|
|
++//
|
|
++// Due to this limitation it is much more straightforward to
|
|
++// first generate the illegal sin8 (svml_sin8 for SVML vector
|
|
++// library) call and then legalize it than trying to avoid
|
|
++// generating illegal code from the beginning.
|
|
++//
|
|
++// A solution for this problem is to check legality of the
|
|
++// call instruction right after generating it in vectorizer and
|
|
++// if it is illegal we split the call arguments and issue multiple
|
|
++// calls to match the legal VF. This is demonstrated currently for
|
|
++// the SVML vector library calls (non-intrinsic version only).
|
|
++//
|
|
++// Future directions and extensions:
|
|
++// 1) This legalization example shows us that a good direction
|
|
++// for the VPlan framework would be to model the vector call
|
|
++// instructions in a way that legal VF for each call is chosen
|
|
++// correctly within vectorizer and illegal code generation is
|
|
++// avoided.
|
|
++// 2) This logic can also be extended to general vector functions
|
|
++// i.e. legalization OpenMP decalre simd functions. The
|
|
++// requirements needed for this will be documented soon.
|
|
++
|
|
++Value *InnerLoopVectorizer::legalizeSVMLCall(CallInst *VecCall,
|
|
++ CallInst *Call) {
|
|
++ ElementCount LegalVF = getLegalVFForCall(VecCall);
|
|
++
|
|
++ assert(LegalVF.getKnownMinValue() > 1 &&
|
|
++ "Legal VF for SVML call must be greater than 1 to vectorize");
|
|
++
|
|
++ if (LegalVF == VF)
|
|
++ return VecCall;
|
|
++ else if (LegalVF.getKnownMinValue() > VF.getKnownMinValue())
|
|
++ // TODO: handle case when we are underfilling vectors
|
|
++ return VecCall;
|
|
++
|
|
++ // Legal VF for this SVML call is smaller than chosen VF, break it down into
|
|
++ // smaller call instructions
|
|
++
|
|
++ // Convert args, types and return type to match legal VF
|
|
++ SmallVector<Type *, 4> NewTys;
|
|
++ SmallVector<Value *, 4> NewArgs;
|
|
++
|
|
++ for (Value *ArgOperand : Call->args()) {
|
|
++ Type *Ty = ToVectorTy(ArgOperand->getType(), LegalVF);
|
|
++ NewTys.push_back(Ty);
|
|
++ NewArgs.push_back(UndefValue::get(Ty));
|
|
+ }
|
|
++
|
|
++ // Construct legal vector function
|
|
++ const VFShape Shape =
|
|
++ VFShape::get(*Call, LegalVF /*EC*/, false /*HasGlobalPred*/);
|
|
++ Function *LegalVectorF = VFDatabase(*Call).getVectorizedFunction(Shape);
|
|
++ assert(LegalVectorF != nullptr && "Can't create legal vector function.");
|
|
++
|
|
++ LLVM_DEBUG(dbgs() << "LV(SVML): LegalVectorF: "; LegalVectorF->dump());
|
|
++
|
|
++ SmallVector<OperandBundleDef, 1> OpBundles;
|
|
++ Call->getOperandBundlesAsDefs(OpBundles);
|
|
++ auto LegalV = std::unique_ptr<CallInst>(CallInst::Create(LegalVectorF, NewArgs, OpBundles));
|
|
++
|
|
++ if (isa<FPMathOperator>(LegalV))
|
|
++ LegalV->copyFastMathFlags(Call);
|
|
++
|
|
++ const DataLayout &DL = VecCall->getModule()->getDataLayout();
|
|
++ // Set SVML calling conventions
|
|
++ setVectorFunctionCallingConv(*LegalV, DL, *TLI);
|
|
++
|
|
++ LLVM_DEBUG(dbgs() << "LV(SVML): LegalV: "; LegalV->dump());
|
|
++
|
|
++ Value *LegalizedCall = partialVectorizeCall(VecCall, LegalV.get(), LegalVF.getKnownMinValue());
|
|
++
|
|
++ LLVM_DEBUG(dbgs() << "LV(SVML): LegalizedCall: "; LegalizedCall->dump());
|
|
++
|
|
++ // Remove the illegal call from Builder
|
|
++ VecCall->eraseFromParent();
|
|
++
|
|
++ return LegalizedCall;
|
|
++}
|
|
++
|
|
++ElementCount InnerLoopVectorizer::getLegalVFForCall(CallInst *CI) {
|
|
++ const DataLayout DL = CI->getModule()->getDataLayout();
|
|
++ FunctionType *CallFT = CI->getFunctionType();
|
|
++ // All functions that need legalization should have a vector return type.
|
|
++ // This is true for all SVML functions that are currently supported.
|
|
++ assert(isa<VectorType>(CallFT->getReturnType()) &&
|
|
++ "Return type of call that needs legalization is not a vector.");
|
|
++ auto *VecCallRetType = cast<VectorType>(CallFT->getReturnType());
|
|
++ Type *ElemType = VecCallRetType->getElementType();
|
|
++
|
|
++ unsigned TypeBitWidth = DL.getTypeSizeInBits(ElemType);
|
|
++ unsigned VectorBitWidth = TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector);
|
|
++ unsigned LegalVF = VectorBitWidth / TypeBitWidth;
|
|
++
|
|
++ LLVM_DEBUG(dbgs() << "LV(SVML): Type Bit Width: " << TypeBitWidth << "\n");
|
|
++ LLVM_DEBUG(dbgs() << "LV(SVML): Current VL: " << VF << "\n");
|
|
++ LLVM_DEBUG(dbgs() << "LV(SVML): Vector Bit Width: " << VectorBitWidth
|
|
++ << "\n");
|
|
++ LLVM_DEBUG(dbgs() << "LV(SVML): Legal Target VL: " << LegalVF << "\n");
|
|
++
|
|
++ return ElementCount::getFixed(LegalVF);
|
|
++}
|
|
++
|
|
++// Partial vectorization of a call instruction is achieved by making clones of
|
|
++// \p LegalCall and overwriting its argument operands with shufflevector
|
|
++// equivalent decided based on \p LegalVF and current Part being filled.
|
|
++Value *InnerLoopVectorizer::partialVectorizeCall(CallInst *Call,
|
|
++ CallInst *LegalCall,
|
|
++ unsigned LegalVF) {
|
|
++ unsigned NumParts = VF.getKnownMinValue() / LegalVF;
|
|
++ LLVM_DEBUG(dbgs() << "LV(SVML): NumParts: " << NumParts << "\n");
|
|
++ SmallVector<Value *, 8> CallResults;
|
|
++
|
|
++ for (unsigned Part = 0; Part < NumParts; ++Part) {
|
|
++ auto *ClonedCall = cast<CallInst>(LegalCall->clone());
|
|
++
|
|
++ // Update the arg operand of cloned call to shufflevector
|
|
++ for (unsigned i = 0, ie = Call->arg_size(); i != ie; ++i) {
|
|
++ auto *NewOp = generateShuffleValue(Call->getArgOperand(i), LegalVF, Part);
|
|
++ ClonedCall->setArgOperand(i, NewOp);
|
|
++ }
|
|
++
|
|
++ LLVM_DEBUG(dbgs() << "LV(SVML): ClonedCall: "; ClonedCall->dump());
|
|
++
|
|
++ auto *PartialVecCall = Builder.Insert(ClonedCall);
|
|
++ CallResults.push_back(PartialVecCall);
|
|
++ }
|
|
++
|
|
++ return combinePartialVecCalls(CallResults);
|
|
++}
|
|
++
|
|
++Value *InnerLoopVectorizer::generateShuffleValue(Value *V, unsigned LegalVF,
|
|
++ unsigned Part) {
|
|
++ // Example:
|
|
++ // Consider the following vector code -
|
|
++ // %1 = sitofp <4 x i32> %0 to <4 x double>
|
|
++ // %2 = call <4 x double> @__svml_sin4(<4 x double> %1)
|
|
++ //
|
|
++ // If the LegalVF is 2, we partially vectorize the sin4 call by invoking
|
|
++ // generateShuffleValue on the operand %1
|
|
++ // If Part = 1, output value is -
|
|
++ // %shuffle = shufflevector <4 x double> %1, <4 x double> undef, <2 x i32><i32 0, i32 1>
|
|
++ // and if Part = 2, output is -
|
|
++ // %shuffle7 =shufflevector <4 x double> %1, <4 x double> undef, <2 x i32><i32 2, i32 3>
|
|
++
|
|
++ assert(isa<VectorType>(V->getType()) &&
|
|
++ "Cannot generate shuffles for non-vector values.");
|
|
++ SmallVector<int, 4> ShuffleMask;
|
|
++ Value *Undef = UndefValue::get(V->getType());
|
|
++
|
|
++ unsigned ElemIdx = Part * LegalVF;
|
|
++
|
|
++ for (unsigned K = 0; K < LegalVF; K++)
|
|
++ ShuffleMask.push_back(static_cast<int>(ElemIdx + K));
|
|
++
|
|
++ auto *ShuffleInst =
|
|
++ Builder.CreateShuffleVector(V, Undef, ShuffleMask, "shuffle");
|
|
++
|
|
++ return ShuffleInst;
|
|
++}
|
|
++
|
|
++// Results of the calls executed by smaller legal call instructions must be
|
|
++// combined to match the original VF for later use. This is done by constructing
|
|
++// shufflevector instructions in a cumulative fashion.
|
|
++Value *InnerLoopVectorizer::combinePartialVecCalls(
|
|
++ SmallVectorImpl<Value *> &CallResults) {
|
|
++ assert(isa<VectorType>(CallResults[0]->getType()) &&
|
|
++ "Cannot combine calls with non-vector results.");
|
|
++ auto *CallType = cast<VectorType>(CallResults[0]->getType());
|
|
++
|
|
++ Value *CombinedShuffle;
|
|
++ unsigned NumElems = CallType->getElementCount().getKnownMinValue() * 2;
|
|
++ unsigned NumRegs = CallResults.size();
|
|
++
|
|
++ assert(NumRegs >= 2 && isPowerOf2_32(NumRegs) &&
|
|
++ "Number of partial vector calls to combine must be a power of 2 "
|
|
++ "(atleast 2^1)");
|
|
++
|
|
++ while (NumRegs > 1) {
|
|
++ for (unsigned I = 0; I < NumRegs; I += 2) {
|
|
++ SmallVector<int, 4> ShuffleMask;
|
|
++ for (unsigned J = 0; J < NumElems; J++)
|
|
++ ShuffleMask.push_back(static_cast<int>(J));
|
|
++
|
|
++ CombinedShuffle = Builder.CreateShuffleVector(
|
|
++ CallResults[I], CallResults[I + 1], ShuffleMask, "combined");
|
|
++ LLVM_DEBUG(dbgs() << "LV(SVML): CombinedShuffle:";
|
|
++ CombinedShuffle->dump());
|
|
++ CallResults.push_back(CombinedShuffle);
|
|
++ }
|
|
++
|
|
++ SmallVector<Value *, 2>::iterator Start = CallResults.begin();
|
|
++ SmallVector<Value *, 2>::iterator End = Start + NumRegs;
|
|
++ CallResults.erase(Start, End);
|
|
++
|
|
++ NumElems *= 2;
|
|
++ NumRegs /= 2;
|
|
++ }
|
|
++
|
|
++ return CombinedShuffle;
|
|
+ }
|
|
+
|
|
+ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
|
|
+diff --git a/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll b/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
|
|
+index df8b7c498bd00..63a36549f18fd 100644
|
|
+--- a/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
|
|
++++ b/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
|
|
+@@ -10,7 +10,7 @@ target triple = "x86_64-unknown-linux-gnu"
|
|
+ define <4 x double> @exp_v4(<4 x double> %in) {
|
|
+ ; SVML-LABEL: define {{[^@]+}}@exp_v4
|
|
+ ; SVML-SAME: (<4 x double> [[IN:%.*]]) {
|
|
+-; SVML-NEXT: [[TMP1:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[IN]])
|
|
++; SVML-NEXT: [[TMP1:%.*]] = call <4 x double> @__svml_exp4_ha(<4 x double> [[IN]])
|
|
+ ; SVML-NEXT: ret <4 x double> [[TMP1]]
|
|
+ ;
|
|
+ ; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_v4
|
|
+@@ -37,7 +37,7 @@ declare <4 x double> @llvm.exp.v4f64(<4 x double>) #0
|
|
+ define <4 x float> @exp_f32(<4 x float> %in) {
|
|
+ ; SVML-LABEL: define {{[^@]+}}@exp_f32
|
|
+ ; SVML-SAME: (<4 x float> [[IN:%.*]]) {
|
|
+-; SVML-NEXT: [[TMP1:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[IN]])
|
|
++; SVML-NEXT: [[TMP1:%.*]] = call <4 x float> @__svml_expf4_ha(<4 x float> [[IN]])
|
|
+ ; SVML-NEXT: ret <4 x float> [[TMP1]]
|
|
+ ;
|
|
+ ; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_f32
|
|
+diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
|
|
+index a6e191c3d6923..d6e2e11106949 100644
|
|
+--- a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
|
|
++++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
|
|
+@@ -39,7 +39,8 @@ for.end: ; preds = %for.body
|
|
+ declare double @__exp_finite(double) #0
|
|
+
|
|
+ ; CHECK-LABEL: @exp_f64
|
|
+-; CHECK: <4 x double> @__svml_exp4
|
|
++; CHECK: <2 x double> @__svml_exp2
|
|
++; CHECK: <2 x double> @__svml_exp2
|
|
+ ; CHECK: ret
|
|
+ define void @exp_f64(double* nocapture %varray) {
|
|
+ entry:
|
|
+@@ -99,7 +100,8 @@ for.end: ; preds = %for.body
|
|
+ declare double @__log_finite(double) #0
|
|
+
|
|
+ ; CHECK-LABEL: @log_f64
|
|
+-; CHECK: <4 x double> @__svml_log4
|
|
++; CHECK: <2 x double> @__svml_log2
|
|
++; CHECK: <2 x double> @__svml_log2
|
|
+ ; CHECK: ret
|
|
+ define void @log_f64(double* nocapture %varray) {
|
|
+ entry:
|
|
+@@ -159,7 +161,8 @@ for.end: ; preds = %for.body
|
|
+ declare double @__pow_finite(double, double) #0
|
|
+
|
|
+ ; CHECK-LABEL: @pow_f64
|
|
+-; CHECK: <4 x double> @__svml_pow4
|
|
++; CHECK: <2 x double> @__svml_pow2
|
|
++; CHECK: <2 x double> @__svml_pow2
|
|
+ ; CHECK: ret
|
|
+ define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
|
|
+ entry:
|
|
+@@ -190,7 +193,8 @@ declare float @__exp2f_finite(float) #0
|
|
+
|
|
+ define void @exp2f_finite(float* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @exp2f_finite(
|
|
+-; CHECK: call <4 x float> @__svml_exp2f4(<4 x float> %{{.*}})
|
|
++; CHECK: call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> %{{.*}})
|
|
++; CHECK: call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> %{{.*}})
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -219,7 +223,8 @@ declare double @__exp2_finite(double) #0
|
|
+
|
|
+ define void @exp2_finite(double* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @exp2_finite(
|
|
+-; CHECK: call <4 x double> @__svml_exp24(<4 x double> {{.*}})
|
|
++; CHECK: call intel_svmlcc128 <2 x double> @__svml_exp22_ha(<2 x double> {{.*}})
|
|
++; CHECK: call intel_svmlcc128 <2 x double> @__svml_exp22_ha(<2 x double> {{.*}})
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -276,7 +281,8 @@ for.end: ; preds = %for.body
|
|
+ declare double @__log2_finite(double) #0
|
|
+
|
|
+ ; CHECK-LABEL: @log2_f64
|
|
+-; CHECK: <4 x double> @__svml_log24
|
|
++; CHECK: <2 x double> @__svml_log22
|
|
++; CHECK: <2 x double> @__svml_log22
|
|
+ ; CHECK: ret
|
|
+ define void @log2_f64(double* nocapture %varray) {
|
|
+ entry:
|
|
+@@ -333,7 +339,8 @@ for.end: ; preds = %for.body
|
|
+ declare double @__log10_finite(double) #0
|
|
+
|
|
+ ; CHECK-LABEL: @log10_f64
|
|
+-; CHECK: <4 x double> @__svml_log104
|
|
++; CHECK: <2 x double> @__svml_log102
|
|
++; CHECK: <2 x double> @__svml_log102
|
|
+ ; CHECK: ret
|
|
+ define void @log10_f64(double* nocapture %varray) {
|
|
+ entry:
|
|
+@@ -390,7 +397,8 @@ for.end: ; preds = %for.body
|
|
+ declare double @__sqrt_finite(double) #0
|
|
+
|
|
+ ; CHECK-LABEL: @sqrt_f64
|
|
+-; CHECK: <4 x double> @__svml_sqrt4
|
|
++; CHECK: <2 x double> @__svml_sqrt2
|
|
++; CHECK: <2 x double> @__svml_sqrt2
|
|
+ ; CHECK: ret
|
|
+ define void @sqrt_f64(double* nocapture %varray) {
|
|
+ entry:
|
|
+diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll
|
|
+index 42c280df6ad02..088bbdcf1aa4a 100644
|
|
+--- a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll
|
|
++++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll
|
|
+@@ -48,7 +48,7 @@ declare float @llvm.exp2.f32(float) #0
|
|
+
|
|
+ define void @sin_f64(double* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @sin_f64(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -71,7 +71,7 @@ for.end:
|
|
+
|
|
+ define void @sin_f32(float* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @sin_f32(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_sinf4_ha(<4 x float> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -94,7 +94,7 @@ for.end:
|
|
+
|
|
+ define void @sin_f64_intrinsic(double* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @sin_f64_intrinsic(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -117,7 +117,7 @@ for.end:
|
|
+
|
|
+ define void @sin_f32_intrinsic(float* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @sin_f32_intrinsic(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_sinf4_ha(<4 x float> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -140,7 +140,7 @@ for.end:
|
|
+
|
|
+ define void @cos_f64(double* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @cos_f64(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -163,7 +163,7 @@ for.end:
|
|
+
|
|
+ define void @cos_f32(float* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @cos_f32(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_cosf4_ha(<4 x float> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -186,7 +186,7 @@ for.end:
|
|
+
|
|
+ define void @cos_f64_intrinsic(double* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @cos_f64_intrinsic(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -209,7 +209,7 @@ for.end:
|
|
+
|
|
+ define void @cos_f32_intrinsic(float* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @cos_f32_intrinsic(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_cosf4_ha(<4 x float> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -232,7 +232,7 @@ for.end:
|
|
+
|
|
+ define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
|
|
+ ; CHECK-LABEL: @pow_f64(
|
|
+-; CHECK: [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
|
|
++; CHECK: [[TMP8:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -257,7 +257,7 @@ for.end:
|
|
+
|
|
+ define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
|
|
+ ; CHECK-LABEL: @pow_f64_intrinsic(
|
|
+-; CHECK: [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
|
|
++; CHECK: [[TMP8:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -282,7 +282,7 @@ for.end:
|
|
+
|
|
+ define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
|
|
+ ; CHECK-LABEL: @pow_f32(
|
|
+-; CHECK: [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
|
|
++; CHECK: [[TMP8:%.*]] = call intel_svmlcc128 <4 x float> @__svml_powf4_ha(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -307,7 +307,7 @@ for.end:
|
|
+
|
|
+ define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
|
|
+ ; CHECK-LABEL: @pow_f32_intrinsic(
|
|
+-; CHECK: [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
|
|
++; CHECK: [[TMP8:%.*]] = call intel_svmlcc128 <4 x float> @__svml_powf4_ha(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -332,7 +332,7 @@ for.end:
|
|
+
|
|
+ define void @exp_f64(double* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @exp_f64(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -355,7 +355,7 @@ for.end:
|
|
+
|
|
+ define void @exp_f32(float* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @exp_f32(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_expf4_ha(<4 x float> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -378,7 +378,7 @@ for.end:
|
|
+
|
|
+ define void @exp_f64_intrinsic(double* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @exp_f64_intrinsic(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -401,7 +401,7 @@ for.end:
|
|
+
|
|
+ define void @exp_f32_intrinsic(float* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @exp_f32_intrinsic(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_expf4_ha(<4 x float> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -424,7 +424,7 @@ for.end:
|
|
+
|
|
+ define void @log_f64(double* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @log_f64(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -447,7 +447,7 @@ for.end:
|
|
+
|
|
+ define void @log_f32(float* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @log_f32(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_logf4_ha(<4 x float> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -470,7 +470,7 @@ for.end:
|
|
+
|
|
+ define void @log_f64_intrinsic(double* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @log_f64_intrinsic(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -493,7 +493,7 @@ for.end:
|
|
+
|
|
+ define void @log_f32_intrinsic(float* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @log_f32_intrinsic(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_logf4_ha(<4 x float> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -516,7 +516,7 @@ for.end:
|
|
+
|
|
+ define void @log2_f64(double* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @log2_f64(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log24_ha(<4 x double> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -539,7 +539,7 @@ for.end:
|
|
+
|
|
+ define void @log2_f32(float* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @log2_f32(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log2f4_ha(<4 x float> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -562,7 +562,7 @@ for.end:
|
|
+
|
|
+ define void @log2_f64_intrinsic(double* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @log2_f64_intrinsic(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log24_ha(<4 x double> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -585,7 +585,7 @@ for.end:
|
|
+
|
|
+ define void @log2_f32_intrinsic(float* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @log2_f32_intrinsic(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log2f4_ha(<4 x float> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -608,7 +608,7 @@ for.end:
|
|
+
|
|
+ define void @log10_f64(double* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @log10_f64(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log104_ha(<4 x double> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -631,7 +631,7 @@ for.end:
|
|
+
|
|
+ define void @log10_f32(float* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @log10_f32(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log10f4_ha(<4 x float> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -654,7 +654,7 @@ for.end:
|
|
+
|
|
+ define void @log10_f64_intrinsic(double* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @log10_f64_intrinsic(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log104_ha(<4 x double> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -677,7 +677,7 @@ for.end:
|
|
+
|
|
+ define void @log10_f32_intrinsic(float* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @log10_f32_intrinsic(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log10f4_ha(<4 x float> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -700,7 +700,7 @@ for.end:
|
|
+
|
|
+ define void @sqrt_f64(double* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @sqrt_f64(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_sqrt4(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sqrt4_ha(<4 x double> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -723,7 +723,7 @@ for.end:
|
|
+
|
|
+ define void @sqrt_f32(float* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @sqrt_f32(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_sqrtf4(<4 x float> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_sqrtf4_ha(<4 x float> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -746,7 +746,7 @@ for.end:
|
|
+
|
|
+ define void @exp2_f64(double* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @exp2_f64(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp24_ha(<4 x double> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -769,7 +769,7 @@ for.end:
|
|
+
|
|
+ define void @exp2_f32(float* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @exp2_f32(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -792,7 +792,7 @@ for.end:
|
|
+
|
|
+ define void @exp2_f64_intrinsic(double* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @exp2_f64_intrinsic(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp24_ha(<4 x double> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -815,7 +815,7 @@ for.end:
|
|
+
|
|
+ define void @exp2_f32_intrinsic(float* nocapture %varray) {
|
|
+ ; CHECK-LABEL: @exp2_f32_intrinsic(
|
|
+-; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
|
|
++; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> [[TMP4:%.*]])
|
|
+ ; CHECK: ret void
|
|
+ ;
|
|
+ entry:
|
|
+@@ -836,4 +836,44 @@ for.end:
|
|
+ ret void
|
|
+ }
|
|
+
|
|
++; CHECK-LABEL: @atan2_finite
|
|
++; CHECK: intel_svmlcc256 <4 x double> @__svml_atan24(
|
|
++; CHECK: intel_svmlcc256 <4 x double> @__svml_atan24(
|
|
++; CHECK: ret
|
|
++
|
|
++declare double @__atan2_finite(double, double) local_unnamed_addr #0
|
|
++
|
|
++define void @atan2_finite([100 x double]* nocapture %varray) local_unnamed_addr #0 {
|
|
++entry:
|
|
++ br label %for.cond1.preheader
|
|
++
|
|
++for.cond1.preheader: ; preds = %for.inc7, %entry
|
|
++ %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for.inc7 ]
|
|
++ %0 = trunc i64 %indvars.iv19 to i32
|
|
++ %conv = sitofp i32 %0 to double
|
|
++ br label %for.body3
|
|
++
|
|
++for.body3: ; preds = %for.body3, %for.cond1.preheader
|
|
++ %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
|
|
++ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
++ %1 = trunc i64 %indvars.iv.next to i32
|
|
++ %conv4 = sitofp i32 %1 to double
|
|
++ %call = tail call fast double @__atan2_finite(double %conv, double %conv4)
|
|
++ %arrayidx6 = getelementptr inbounds [100 x double], [100 x double]* %varray, i64 %indvars.iv19, i64 %indvars.iv
|
|
++ store double %call, double* %arrayidx6, align 8
|
|
++ %exitcond = icmp eq i64 %indvars.iv.next, 100
|
|
++ br i1 %exitcond, label %for.inc7, label %for.body3, !llvm.loop !5
|
|
++
|
|
++for.inc7: ; preds = %for.body3
|
|
++ %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
|
|
++ %exitcond21 = icmp eq i64 %indvars.iv.next20, 100
|
|
++ br i1 %exitcond21, label %for.end9, label %for.cond1.preheader
|
|
++
|
|
++for.end9: ; preds = %for.inc7
|
|
++ ret void
|
|
++}
|
|
++
|
|
+ attributes #0 = { nounwind readnone }
|
|
++!5 = distinct !{!5, !6, !7}
|
|
++!6 = !{!"llvm.loop.vectorize.width", i32 8}
|
|
++!7 = !{!"llvm.loop.vectorize.enable", i1 true}
|
|
+diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll
|
|
+new file mode 100644
|
|
+index 0000000000000..326c763994343
|
|
+--- /dev/null
|
|
++++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll
|
|
+@@ -0,0 +1,513 @@
|
|
++; Check legalization of SVML calls, including intrinsic versions (like @llvm.<fn_name>.<type>).
|
|
++
|
|
++; RUN: opt -vector-library=SVML -inject-tli-mappings -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s
|
|
++
|
|
++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
++target triple = "x86_64-unknown-linux-gnu"
|
|
++
|
|
++declare double @sin(double) #0
|
|
++declare float @sinf(float) #0
|
|
++declare double @llvm.sin.f64(double) #0
|
|
++declare float @llvm.sin.f32(float) #0
|
|
++
|
|
++declare double @cos(double) #0
|
|
++declare float @cosf(float) #0
|
|
++declare double @llvm.cos.f64(double) #0
|
|
++declare float @llvm.cos.f32(float) #0
|
|
++
|
|
++declare double @pow(double, double) #0
|
|
++declare float @powf(float, float) #0
|
|
++declare double @llvm.pow.f64(double, double) #0
|
|
++declare float @llvm.pow.f32(float, float) #0
|
|
++
|
|
++declare double @exp(double) #0
|
|
++declare float @expf(float) #0
|
|
++declare double @llvm.exp.f64(double) #0
|
|
++declare float @llvm.exp.f32(float) #0
|
|
++
|
|
++declare double @log(double) #0
|
|
++declare float @logf(float) #0
|
|
++declare double @llvm.log.f64(double) #0
|
|
++declare float @llvm.log.f32(float) #0
|
|
++
|
|
++
|
|
++define void @sin_f64(double* nocapture %varray) {
|
|
++; CHECK-LABEL: @sin_f64(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP2:%.*]])
|
|
++; CHECK: [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to double
|
|
++ %call = tail call double @sin(double %conv)
|
|
++ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
|
++ store double %call, double* %arrayidx, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @sin_f32(float* nocapture %varray) {
|
|
++; CHECK-LABEL: @sin_f32(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_sinf8_ha(<8 x float> [[TMP2:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to float
|
|
++ %call = tail call float @sinf(float %conv)
|
|
++ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
|
++ store float %call, float* %arrayidx, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @sin_f64_intrinsic(double* nocapture %varray) {
|
|
++; CHECK-LABEL: @sin_f64_intrinsic(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP2:%.*]])
|
|
++; CHECK: [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to double
|
|
++ %call = tail call double @llvm.sin.f64(double %conv)
|
|
++ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
|
++ store double %call, double* %arrayidx, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @sin_f32_intrinsic(float* nocapture %varray) {
|
|
++; CHECK-LABEL: @sin_f32_intrinsic(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_sinf8_ha(<8 x float> [[TMP2:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to float
|
|
++ %call = tail call float @llvm.sin.f32(float %conv)
|
|
++ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
|
++ store float %call, float* %arrayidx, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @cos_f64(double* nocapture %varray) {
|
|
++; CHECK-LABEL: @cos_f64(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP2:%.*]])
|
|
++; CHECK: [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to double
|
|
++ %call = tail call double @cos(double %conv)
|
|
++ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
|
++ store double %call, double* %arrayidx, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @cos_f32(float* nocapture %varray) {
|
|
++; CHECK-LABEL: @cos_f32(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_cosf8_ha(<8 x float> [[TMP2:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to float
|
|
++ %call = tail call float @cosf(float %conv)
|
|
++ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
|
++ store float %call, float* %arrayidx, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @cos_f64_intrinsic(double* nocapture %varray) {
|
|
++; CHECK-LABEL: @cos_f64_intrinsic(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP2:%.*]])
|
|
++; CHECK: [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to double
|
|
++ %call = tail call double @llvm.cos.f64(double %conv)
|
|
++ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
|
++ store double %call, double* %arrayidx, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @cos_f32_intrinsic(float* nocapture %varray) {
|
|
++; CHECK-LABEL: @cos_f32_intrinsic(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_cosf8_ha(<8 x float> [[TMP2:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to float
|
|
++ %call = tail call float @llvm.cos.f32(float %conv)
|
|
++ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
|
++ store float %call, float* %arrayidx, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
|
|
++; CHECK-LABEL: @pow_f64(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP2:%.*]], <4 x double> [[TMP3:%.*]])
|
|
++; CHECK: [[TMP4:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP5:%.*]], <4 x double> [[TMP6:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to double
|
|
++ %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
|
|
++ %tmp1 = load double, double* %arrayidx, align 4
|
|
++ %tmp2 = tail call double @pow(double %conv, double %tmp1)
|
|
++ %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
|
|
++ store double %tmp2, double* %arrayidx2, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
|
|
++; CHECK-LABEL: @pow_f64_intrinsic(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP2:%.*]], <4 x double> [[TMP3:%.*]])
|
|
++; CHECK: [[TMP4:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP5:%.*]], <4 x double> [[TMP6:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to double
|
|
++ %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
|
|
++ %tmp1 = load double, double* %arrayidx, align 4
|
|
++ %tmp2 = tail call double @llvm.pow.f64(double %conv, double %tmp1)
|
|
++ %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
|
|
++ store double %tmp2, double* %arrayidx2, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
|
|
++; CHECK-LABEL: @pow_f32(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_powf8_ha(<8 x float> [[TMP2:%.*]], <8 x float> [[WIDE_LOAD:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to float
|
|
++ %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
|
|
++ %tmp1 = load float, float* %arrayidx, align 4
|
|
++ %tmp2 = tail call float @powf(float %conv, float %tmp1)
|
|
++ %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
|
|
++ store float %tmp2, float* %arrayidx2, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
|
|
++; CHECK-LABEL: @pow_f32_intrinsic(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_powf8_ha(<8 x float> [[TMP2:%.*]], <8 x float> [[TMP3:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to float
|
|
++ %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
|
|
++ %tmp1 = load float, float* %arrayidx, align 4
|
|
++ %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1)
|
|
++ %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
|
|
++ store float %tmp2, float* %arrayidx2, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @exp_f64(double* nocapture %varray) {
|
|
++; CHECK-LABEL: @exp_f64(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP2:%.*]])
|
|
++; CHECK: [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to double
|
|
++ %call = tail call double @exp(double %conv)
|
|
++ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
|
++ store double %call, double* %arrayidx, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @exp_f32(float* nocapture %varray) {
|
|
++; CHECK-LABEL: @exp_f32(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_expf8_ha(<8 x float> [[TMP2:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to float
|
|
++ %call = tail call float @expf(float %conv)
|
|
++ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
|
++ store float %call, float* %arrayidx, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @exp_f64_intrinsic(double* nocapture %varray) {
|
|
++; CHECK-LABEL: @exp_f64_intrinsic(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP2:%.*]])
|
|
++; CHECK: [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to double
|
|
++ %call = tail call double @llvm.exp.f64(double %conv)
|
|
++ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
|
++ store double %call, double* %arrayidx, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @exp_f32_intrinsic(float* nocapture %varray) {
|
|
++; CHECK-LABEL: @exp_f32_intrinsic(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_expf8_ha(<8 x float> [[TMP2:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to float
|
|
++ %call = tail call float @llvm.exp.f32(float %conv)
|
|
++ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
|
++ store float %call, float* %arrayidx, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @log_f64(double* nocapture %varray) {
|
|
++; CHECK-LABEL: @log_f64(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP2:%.*]])
|
|
++; CHECK: [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to double
|
|
++ %call = tail call double @log(double %conv)
|
|
++ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
|
++ store double %call, double* %arrayidx, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @log_f32(float* nocapture %varray) {
|
|
++; CHECK-LABEL: @log_f32(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_logf8_ha(<8 x float> [[TMP2:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to float
|
|
++ %call = tail call float @logf(float %conv)
|
|
++ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
|
++ store float %call, float* %arrayidx, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @log_f64_intrinsic(double* nocapture %varray) {
|
|
++; CHECK-LABEL: @log_f64_intrinsic(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP2:%.*]])
|
|
++; CHECK: [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to double
|
|
++ %call = tail call double @llvm.log.f64(double %conv)
|
|
++ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
|
++ store double %call, double* %arrayidx, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++define void @log_f32_intrinsic(float* nocapture %varray) {
|
|
++; CHECK-LABEL: @log_f32_intrinsic(
|
|
++; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_logf8_ha(<8 x float> [[TMP2:%.*]])
|
|
++; CHECK: ret void
|
|
++;
|
|
++entry:
|
|
++ br label %for.body
|
|
++
|
|
++for.body:
|
|
++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
++ %tmp = trunc i64 %iv to i32
|
|
++ %conv = sitofp i32 %tmp to float
|
|
++ %call = tail call float @llvm.log.f32(float %conv)
|
|
++ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
|
++ store float %call, float* %arrayidx, align 4
|
|
++ %iv.next = add nuw nsw i64 %iv, 1
|
|
++ %exitcond = icmp eq i64 %iv.next, 1000
|
|
++ br i1 %exitcond, label %for.end, label %for.body
|
|
++
|
|
++for.end:
|
|
++ ret void
|
|
++}
|
|
++
|
|
++attributes #0 = { nounwind readnone }
|
|
++
|
|
+diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll
|
|
+new file mode 100644
|
|
+index 0000000000000..9422653445dc2
|
|
+--- /dev/null
|
|
++++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll
|
|
+@@ -0,0 +1,61 @@
|
|
++; Check that vector codegen splits illegal sin8 call to two sin4 calls on AVX for double datatype.
|
|
++; The C code used to generate this test:
|
|
++
|
|
++; #include <math.h>
|
|
++;
|
|
++; void foo(double *a, int N){
|
|
++; int i;
|
|
++; #pragma clang loop vectorize_width(8)
|
|
++; for (i=0;i<N;i++){
|
|
++; a[i] = sin(i);
|
|
++; }
|
|
++; }
|
|
++
|
|
++; RUN: opt -vector-library=SVML -inject-tli-mappings -loop-vectorize -force-vector-width=8 -mattr=avx -S < %s | FileCheck %s
|
|
++
|
|
++; CHECK: [[I1:%.*]] = sitofp <8 x i32> [[I0:%.*]] to <8 x double>
|
|
++; CHECK-NEXT: [[S1:%shuffle.*]] = shufflevector <8 x double> [[I1]], <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
++; CHECK-NEXT: [[I2:%.*]] = call fast intel_svmlcc256 <4 x double> @__svml_sin4(<4 x double> [[S1]])
|
|
++; CHECK-NEXT: [[S2:%shuffle.*]] = shufflevector <8 x double> [[I1]], <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
|
++; CHECK-NEXT: [[I3:%.*]] = call fast intel_svmlcc256 <4 x double> @__svml_sin4(<4 x double> [[S2]])
|
|
++; CHECK-NEXT: [[comb:%combined.*]] = shufflevector <4 x double> [[I2]], <4 x double> [[I3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
++; CHECK: store <8 x double> [[comb]], <8 x double>* [[TMP:%.*]], align 8
|
|
++
|
|
++
|
|
++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
++target triple = "x86_64-unknown-linux-gnu"
|
|
++
|
|
++; Function Attrs: nounwind uwtable
|
|
++define dso_local void @foo(double* nocapture %a, i32 %N) local_unnamed_addr #0 {
|
|
++entry:
|
|
++ %cmp5 = icmp sgt i32 %N, 0
|
|
++ br i1 %cmp5, label %for.body.preheader, label %for.end
|
|
++
|
|
++for.body.preheader: ; preds = %entry
|
|
++ %wide.trip.count = zext i32 %N to i64
|
|
++ br label %for.body
|
|
++
|
|
++for.body: ; preds = %for.body, %for.body.preheader
|
|
++ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
|
++ %0 = trunc i64 %indvars.iv to i32
|
|
++ %conv = sitofp i32 %0 to double
|
|
++ %call = tail call fast double @sin(double %conv) #2
|
|
++ %arrayidx = getelementptr inbounds double, double* %a, i64 %indvars.iv
|
|
++ store double %call, double* %arrayidx, align 8, !tbaa !2
|
|
++ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
++ %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
|
++ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !6
|
|
++
|
|
++for.end: ; preds = %for.body, %entry
|
|
++ ret void
|
|
++}
|
|
++
|
|
++; Function Attrs: nounwind
|
|
++declare dso_local double @sin(double) local_unnamed_addr #1
|
|
++
|
|
++!2 = !{!3, !3, i64 0}
|
|
++!3 = !{!"double", !4, i64 0}
|
|
++!4 = !{!"omnipotent char", !5, i64 0}
|
|
++!5 = !{!"Simple C/C++ TBAA"}
|
|
++!6 = distinct !{!6, !7}
|
|
++!7 = !{!"llvm.loop.vectorize.width", i32 8}
|
|
+diff --git a/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll b/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll
|
|
+index e8c83c4d9bd1f..615fdc29176a2 100644
|
|
+--- a/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll
|
|
++++ b/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll
|
|
+@@ -12,12 +12,12 @@ target triple = "x86_64-unknown-linux-gnu"
|
|
+
|
|
+ ; COMMON-LABEL: @llvm.compiler.used = appending global
|
|
+ ; SVML-SAME: [6 x i8*] [
|
|
+-; SVML-SAME: i8* bitcast (<2 x double> (<2 x double>)* @__svml_sin2 to i8*),
|
|
+-; SVML-SAME: i8* bitcast (<4 x double> (<4 x double>)* @__svml_sin4 to i8*),
|
|
+-; SVML-SAME: i8* bitcast (<8 x double> (<8 x double>)* @__svml_sin8 to i8*),
|
|
+-; SVML-SAME: i8* bitcast (<4 x float> (<4 x float>)* @__svml_log10f4 to i8*),
|
|
+-; SVML-SAME: i8* bitcast (<8 x float> (<8 x float>)* @__svml_log10f8 to i8*),
|
|
+-; SVML-SAME: i8* bitcast (<16 x float> (<16 x float>)* @__svml_log10f16 to i8*)
|
|
++; SVML-SAME: i8* bitcast (<2 x double> (<2 x double>)* @__svml_sin2_ha to i8*),
|
|
++; SVML-SAME: i8* bitcast (<4 x double> (<4 x double>)* @__svml_sin4_ha to i8*),
|
|
++; SVML-SAME: i8* bitcast (<8 x double> (<8 x double>)* @__svml_sin8_ha to i8*),
|
|
++; SVML-SAME: i8* bitcast (<4 x float> (<4 x float>)* @__svml_log10f4_ha to i8*),
|
|
++; SVML-SAME: i8* bitcast (<8 x float> (<8 x float>)* @__svml_log10f8_ha to i8*),
|
|
++; SVML-SAME: i8* bitcast (<16 x float> (<16 x float>)* @__svml_log10f16_ha to i8*)
|
|
+ ; MASSV-SAME: [2 x i8*] [
|
|
+ ; MASSV-SAME: i8* bitcast (<2 x double> (<2 x double>)* @__sind2 to i8*),
|
|
+ ; MASSV-SAME: i8* bitcast (<4 x float> (<4 x float>)* @__log10f4 to i8*)
|
|
+@@ -59,9 +59,9 @@ declare float @llvm.log10.f32(float) #0
|
|
+ attributes #0 = { nounwind readnone }
|
|
+
|
|
+ ; SVML: attributes #[[SIN]] = { "vector-function-abi-variant"=
|
|
+-; SVML-SAME: "_ZGV_LLVM_N2v_sin(__svml_sin2),
|
|
+-; SVML-SAME: _ZGV_LLVM_N4v_sin(__svml_sin4),
|
|
+-; SVML-SAME: _ZGV_LLVM_N8v_sin(__svml_sin8)" }
|
|
++; SVML-SAME: "_ZGV_LLVM_N2v_sin(__svml_sin2_ha),
|
|
++; SVML-SAME: _ZGV_LLVM_N4v_sin(__svml_sin4_ha),
|
|
++; SVML-SAME: _ZGV_LLVM_N8v_sin(__svml_sin8_ha)" }
|
|
+
|
|
+ ; MASSV: attributes #[[SIN]] = { "vector-function-abi-variant"=
|
|
+ ; MASSV-SAME: "_ZGV_LLVM_N2v_sin(__sind2)" }
|
|
+diff --git a/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt b/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt
|
|
+index 97df6a55d1b59..199e0285c9e5d 100644
|
|
+--- a/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt
|
|
++++ b/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt
|
|
+@@ -47,6 +47,7 @@ add_tablegen(llvm-tblgen LLVM
|
|
+ SearchableTableEmitter.cpp
|
|
+ SubtargetEmitter.cpp
|
|
+ SubtargetFeatureInfo.cpp
|
|
++ SVMLEmitter.cpp
|
|
+ TableGen.cpp
|
|
+ Types.cpp
|
|
+ X86DisassemblerTables.cpp
|
|
+diff --git a/llvm-14.0.6.src/utils/TableGen/SVMLEmitter.cpp b/llvm-14.0.6.src/utils/TableGen/SVMLEmitter.cpp
|
|
+new file mode 100644
|
|
+index 0000000000000..a5aeea48db28b
|
|
+--- /dev/null
|
|
++++ b/llvm-14.0.6.src/utils/TableGen/SVMLEmitter.cpp
|
|
+@@ -0,0 +1,110 @@
|
|
++//===------ SVMLEmitter.cpp - Generate SVML function variants -------------===//
|
|
++//
|
|
++// The LLVM Compiler Infrastructure
|
|
++//
|
|
++// This file is distributed under the University of Illinois Open Source
|
|
++// License. See LICENSE.TXT for details.
|
|
++//
|
|
++//===----------------------------------------------------------------------===//
|
|
++//
|
|
++// This tablegen backend emits the scalar to svml function map for TLI.
|
|
++//
|
|
++//===----------------------------------------------------------------------===//
|
|
++
|
|
++#include "CodeGenTarget.h"
|
|
++#include "llvm/Support/Format.h"
|
|
++#include "llvm/TableGen/Error.h"
|
|
++#include "llvm/TableGen/Record.h"
|
|
++#include "llvm/TableGen/TableGenBackend.h"
|
|
++#include <map>
|
|
++#include <vector>
|
|
++
|
|
++using namespace llvm;
|
|
++
|
|
++#define DEBUG_TYPE "SVMLVariants"
|
|
++#include "llvm/Support/Debug.h"
|
|
++
|
|
++namespace {
|
|
++
|
|
++class SVMLVariantsEmitter {
|
|
++
|
|
++ RecordKeeper &Records;
|
|
++
|
|
++private:
|
|
++ void emitSVMLVariants(raw_ostream &OS);
|
|
++
|
|
++public:
|
|
++ SVMLVariantsEmitter(RecordKeeper &R) : Records(R) {}
|
|
++
|
|
++ void run(raw_ostream &OS);
|
|
++};
|
|
++} // End anonymous namespace
|
|
++
|
|
++/// \brief Emit the set of SVML variant function names.
|
|
++// The default is to emit the high accuracy SVML variants until a mechanism is
|
|
++// introduced to allow a selection of different variants through precision
|
|
++// requirements specified by the user. This code generates mappings to svml
|
|
++// that are in the scalar form of llvm intrinsics, math library calls, or the
|
|
++// finite variants of math library calls.
|
|
++void SVMLVariantsEmitter::emitSVMLVariants(raw_ostream &OS) {
|
|
++
|
|
++ const unsigned MinSinglePrecVL = 4;
|
|
++ const unsigned MaxSinglePrecVL = 16;
|
|
++ const unsigned MinDoublePrecVL = 2;
|
|
++ const unsigned MaxDoublePrecVL = 8;
|
|
++
|
|
++ OS << "#ifdef GET_SVML_VARIANTS\n";
|
|
++
|
|
++ for (const auto &D : Records.getAllDerivedDefinitions("SvmlVariant")) {
|
|
++ StringRef SvmlVariantNameStr = D->getName();
|
|
++ // Single Precision SVML
|
|
++ for (unsigned VL = MinSinglePrecVL; VL <= MaxSinglePrecVL; VL *= 2) {
|
|
++ // Emit the scalar math library function to svml function entry.
|
|
++ OS << "{\"" << SvmlVariantNameStr << "f" << "\", ";
|
|
++ OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
|
|
++ << "ElementCount::getFixed(" << VL << ")},\n";
|
|
++
|
|
++ // Emit the scalar intrinsic to svml function entry.
|
|
++ OS << "{\"" << "llvm." << SvmlVariantNameStr << ".f32" << "\", ";
|
|
++ OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
|
|
++ << "ElementCount::getFixed(" << VL << ")},\n";
|
|
++
|
|
++ // Emit the finite math library function to svml function entry.
|
|
++ OS << "{\"__" << SvmlVariantNameStr << "f_finite" << "\", ";
|
|
++ OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
|
|
++ << "ElementCount::getFixed(" << VL << ")},\n";
|
|
++ }
|
|
++
|
|
++ // Double Precision SVML
|
|
++ for (unsigned VL = MinDoublePrecVL; VL <= MaxDoublePrecVL; VL *= 2) {
|
|
++ // Emit the scalar math library function to svml function entry.
|
|
++ OS << "{\"" << SvmlVariantNameStr << "\", ";
|
|
++ OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " << "ElementCount::getFixed(" << VL
|
|
++ << ")},\n";
|
|
++
|
|
++ // Emit the scalar intrinsic to svml function entry.
|
|
++ OS << "{\"" << "llvm." << SvmlVariantNameStr << ".f64" << "\", ";
|
|
++ OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " << "ElementCount::getFixed(" << VL
|
|
++ << ")},\n";
|
|
++
|
|
++ // Emit the finite math library function to svml function entry.
|
|
++ OS << "{\"__" << SvmlVariantNameStr << "_finite" << "\", ";
|
|
++ OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", "
|
|
++ << "ElementCount::getFixed(" << VL << ")},\n";
|
|
++ }
|
|
++ }
|
|
++
|
|
++ OS << "#endif // GET_SVML_VARIANTS\n\n";
|
|
++}
|
|
++
|
|
++void SVMLVariantsEmitter::run(raw_ostream &OS) {
|
|
++ emitSVMLVariants(OS);
|
|
++}
|
|
++
|
|
++namespace llvm {
|
|
++
|
|
++void EmitSVMLVariants(RecordKeeper &RK, raw_ostream &OS) {
|
|
++ SVMLVariantsEmitter(RK).run(OS);
|
|
++}
|
|
++
|
|
++} // End llvm namespace
|
|
+diff --git a/llvm-14.0.6.src/utils/TableGen/TableGen.cpp b/llvm-14.0.6.src/utils/TableGen/TableGen.cpp
|
|
+index 2d4a45f889be6..603d0c223b33a 100644
|
|
+--- a/llvm-14.0.6.src/utils/TableGen/TableGen.cpp
|
|
++++ b/llvm-14.0.6.src/utils/TableGen/TableGen.cpp
|
|
+@@ -57,6 +57,7 @@ enum ActionType {
|
|
+ GenAutomata,
|
|
+ GenDirectivesEnumDecl,
|
|
+ GenDirectivesEnumImpl,
|
|
++ GenSVMLVariants,
|
|
+ };
|
|
+
|
|
+ namespace llvm {
|
|
+@@ -138,7 +139,9 @@ cl::opt<ActionType> Action(
|
|
+ clEnumValN(GenDirectivesEnumDecl, "gen-directive-decl",
|
|
+ "Generate directive related declaration code (header file)"),
|
|
+ clEnumValN(GenDirectivesEnumImpl, "gen-directive-impl",
|
|
+- "Generate directive related implementation code")));
|
|
++ "Generate directive related implementation code"),
|
|
++ clEnumValN(GenSVMLVariants, "gen-svml",
|
|
++ "Generate SVML variant function names")));
|
|
+
|
|
+ cl::OptionCategory PrintEnumsCat("Options for -print-enums");
|
|
+ cl::opt<std::string> Class("class", cl::desc("Print Enum list for this class"),
|
|
+@@ -272,6 +275,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
|
|
+ case GenDirectivesEnumImpl:
|
|
+ EmitDirectivesImpl(Records, OS);
|
|
+ break;
|
|
++ case GenSVMLVariants:
|
|
++ EmitSVMLVariants(Records, OS);
|
|
++ break;
|
|
+ }
|
|
+
|
|
+ return false;
|
|
+diff --git a/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h b/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h
|
|
+index 71db8dc77b052..86c3a3068c2dc 100644
|
|
+--- a/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h
|
|
++++ b/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h
|
|
+@@ -93,6 +93,7 @@ void EmitExegesis(RecordKeeper &RK, raw_ostream &OS);
|
|
+ void EmitAutomata(RecordKeeper &RK, raw_ostream &OS);
|
|
+ void EmitDirectivesDecl(RecordKeeper &RK, raw_ostream &OS);
|
|
+ void EmitDirectivesImpl(RecordKeeper &RK, raw_ostream &OS);
|
|
++void EmitSVMLVariants(RecordKeeper &RK, raw_ostream &OS);
|
|
+
|
|
+ } // End llvm namespace
|
|
+
|
|
+diff --git a/llvm-14.0.6.src/utils/vim/syntax/llvm.vim b/llvm-14.0.6.src/utils/vim/syntax/llvm.vim
|
|
+index 205db16b7d8cd..2572ab5a59e1b 100644
|
|
+--- a/llvm-14.0.6.src/utils/vim/syntax/llvm.vim
|
|
++++ b/llvm-14.0.6.src/utils/vim/syntax/llvm.vim
|
|
+@@ -104,6 +104,7 @@ syn keyword llvmKeyword
|
|
+ \ inreg
|
|
+ \ intel_ocl_bicc
|
|
+ \ inteldialect
|
|
++ \ intel_svmlcc
|
|
+ \ internal
|
|
+ \ jumptable
|
|
+ \ linkonce
|
|
diff --git a/conda-recipes/llvmdev/bld.bat b/conda-recipes/llvmdev/bld.bat
|
|
index 1ce228c80..e48800dc5 100644
|
|
--- a/conda-recipes/llvmdev/bld.bat
|
|
+++ b/conda-recipes/llvmdev/bld.bat
|
|
@@ -1,3 +1,13 @@
|
|
+setlocal EnableDelayedExpansion
|
|
+FOR /D %%d IN (llvm-*.src) DO (MKLINK /J llvm %%d
|
|
+if !errorlevel! neq 0 exit /b %errorlevel%)
|
|
+FOR /D %%d IN (lld-*.src) DO (MKLINK /J lld %%d
|
|
+if !errorlevel! neq 0 exit /b %errorlevel%)
|
|
+FOR /D %%d IN (unwind\libunwind-*.src) DO (MKLINK /J libunwind %%d
|
|
+if !errorlevel! neq 0 exit /b %errorlevel%)
|
|
+
|
|
+DIR
|
|
+
|
|
mkdir build
|
|
cd build
|
|
|
|
@@ -24,31 +34,18 @@ REM the 64bit linker anyway. This must be passed in to certain generators as
|
|
REM '-Thost x64'.
|
|
set PreferredToolArchitecture=x64
|
|
|
|
-set MAX_INDEX_CMAKE_GENERATOR=2
|
|
-
|
|
-REM On older generators we can squeete the architecture into the generator
|
|
-REM name. In newer generators, we must use the -A flag for cmake to hand in the
|
|
-REM correct architecture. Also, using Visual Studio 16 2019 we use toolset
|
|
-REM v141, which basically means use a Visual Studio 15 2017 type compiler from
|
|
-REM Visual Studio 16 2019. See also:
|
|
-REM https://stackoverflow.com/questions/55708600/whats-the-cmake-generator-for-visual-studio-2019
|
|
+set MAX_INDEX_CMAKE_GENERATOR=0
|
|
|
|
-set "CMAKE_GENERATOR[0]=Visual Studio 14 2015%ARCH_POSTFIX%"
|
|
-set "CMAKE_GENERATOR[1]=Visual Studio 15 2017%ARCH_POSTFIX%"
|
|
-set "CMAKE_GENERATOR[2]=Visual Studio 16 2019"
|
|
+set "CMAKE_GENERATOR[0]=Visual Studio 16 2019"
|
|
|
|
-set "CMAKE_GENERATOR_ARCHITECTURE[0]="
|
|
-set "CMAKE_GENERATOR_ARCHITECTURE[1]="
|
|
-set "CMAKE_GENERATOR_ARCHITECTURE[2]=%GEN_ARCH%"
|
|
+set "CMAKE_GENERATOR_ARCHITECTURE[0]=%GEN_ARCH%"
|
|
|
|
-set "CMAKE_GENERATOR_TOOLSET[0]=host %PreferredToolArchitecture%"
|
|
-set "CMAKE_GENERATOR_TOOLSET[1]=host %PreferredToolArchitecture%"
|
|
-set "CMAKE_GENERATOR_TOOLSET[2]=v141"
|
|
+set "CMAKE_GENERATOR_TOOLSET[0]=v142"
|
|
|
|
REM Reduce build times and package size by removing unused stuff
|
|
REM BENCHMARKS (new for llvm8) don't build under Visual Studio 14 2015
|
|
set CMAKE_CUSTOM=-DLLVM_TARGETS_TO_BUILD="%LLVM_TARGETS_TO_BUILD%" ^
|
|
- -DLLVM_INCLUDE_TESTS=OFF ^
|
|
+ -DLLVM_ENABLE_PROJECTS:STRING=lld ^
|
|
-DLLVM_INCLUDE_UTILS=ON ^
|
|
-DLLVM_INCLUDE_DOCS=OFF ^
|
|
-DLLVM_INCLUDE_EXAMPLES=OFF ^
|
|
@@ -67,7 +64,7 @@ for /l %%n in (0,1,%MAX_INDEX_CMAKE_GENERATOR%) do (
|
|
-DCMAKE_BUILD_TYPE="%BUILD_CONFIG%" ^
|
|
-DCMAKE_PREFIX_PATH="%LIBRARY_PREFIX%" ^
|
|
-DCMAKE_INSTALL_PREFIX:PATH="%LIBRARY_PREFIX%" ^
|
|
- %CMAKE_CUSTOM% "%SRC_DIR%"
|
|
+ %CMAKE_CUSTOM% "%SRC_DIR%\llvm"
|
|
if not errorlevel 1 goto configuration_successful
|
|
del CMakeCache.txt
|
|
)
|
|
diff --git a/conda-recipes/llvmdev/build.sh b/conda-recipes/llvmdev/build.sh
|
|
index fd99eee90..2cc8464c6 100644
|
|
--- a/conda-recipes/llvmdev/build.sh
|
|
+++ b/conda-recipes/llvmdev/build.sh
|
|
@@ -15,10 +15,14 @@ else
|
|
DARWIN_TARGET=x86_64-apple-darwin13.4.0
|
|
fi
|
|
|
|
+mv llvm-*.src llvm
|
|
+mv lld-*.src lld
|
|
+mv unwind/libunwind-*.src libunwind
|
|
|
|
declare -a _cmake_config
|
|
_cmake_config+=(-DCMAKE_INSTALL_PREFIX:PATH=${PREFIX})
|
|
_cmake_config+=(-DCMAKE_BUILD_TYPE:STRING=Release)
|
|
+_cmake_config+=(-DLLVM_ENABLE_PROJECTS:STRING="lld")
|
|
# The bootstrap clang I use was built with a static libLLVMObject.a and I trying to get the same here
|
|
# _cmake_config+=(-DBUILD_SHARED_LIBS:BOOL=ON)
|
|
_cmake_config+=(-DLLVM_ENABLE_ASSERTIONS:BOOL=ON)
|
|
@@ -27,6 +31,7 @@ _cmake_config+=(-DLINK_POLLY_INTO_TOOLS:BOOL=ON)
|
|
_cmake_config+=(-DLLVM_ENABLE_LIBXML2:BOOL=OFF)
|
|
# Urgh, llvm *really* wants to link to ncurses / terminfo and we *really* do not want it to.
|
|
_cmake_config+=(-DHAVE_TERMINFO_CURSES=OFF)
|
|
+_cmake_config+=(-DLLVM_ENABLE_TERMINFO=OFF)
|
|
# Sometimes these are reported as unused. Whatever.
|
|
_cmake_config+=(-DHAVE_TERMINFO_NCURSES=OFF)
|
|
_cmake_config+=(-DHAVE_TERMINFO_NCURSESW=OFF)
|
|
@@ -39,10 +44,10 @@ _cmake_config+=(-DLLVM_ENABLE_RTTI=OFF)
|
|
_cmake_config+=(-DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD})
|
|
_cmake_config+=(-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly)
|
|
_cmake_config+=(-DLLVM_INCLUDE_UTILS=ON) # for llvm-lit
|
|
+_cmake_config+=(-DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF) # doesn't build without the rest of LLVM project
|
|
# TODO :: It would be nice if we had a cross-ecosystem 'BUILD_TIME_LIMITED' env var we could use to
|
|
# disable these unnecessary but useful things.
|
|
if [[ ${CONDA_FORGE} == yes ]]; then
|
|
- _cmake_config+=(-DLLVM_INCLUDE_TESTS=OFF)
|
|
_cmake_config+=(-DLLVM_INCLUDE_DOCS=OFF)
|
|
_cmake_config+=(-DLLVM_INCLUDE_EXAMPLES=OFF)
|
|
fi
|
|
@@ -76,7 +81,7 @@ cd build
|
|
|
|
cmake -G'Unix Makefiles' \
|
|
"${_cmake_config[@]}" \
|
|
- ..
|
|
+ ../llvm
|
|
|
|
ARCH=`uname -m`
|
|
if [ $ARCH == 'armv7l' ]; then # RPi need thread count throttling
|
|
@@ -85,6 +90,8 @@ else
|
|
make -j${CPU_COUNT} VERBOSE=1
|
|
fi
|
|
|
|
+make check-llvm-unit || exit $?
|
|
+
|
|
# From: https://github.com/conda-forge/llvmdev-feedstock/pull/53
|
|
make install || exit $?
|
|
|
|
@@ -93,10 +100,3 @@ if [[ $ARCH == 'x86_64' ]]; then
|
|
bin/opt -S -vector-library=SVML -mcpu=haswell -O3 $RECIPE_DIR/numba-3016.ll | bin/FileCheck $RECIPE_DIR/numba-3016.ll || exit $?
|
|
fi
|
|
|
|
-# run the tests, skip some on linux-32
|
|
-cd ../test
|
|
-if [[ $ARCH == 'i686' ]]; then
|
|
- ../build/bin/llvm-lit -vv Transforms Analysis CodeGen/X86
|
|
-else
|
|
- ../build/bin/llvm-lit -vv Transforms ExecutionEngine Analysis CodeGen/X86
|
|
-fi
|
|
diff --git a/conda-recipes/llvmdev/meta.yaml b/conda-recipes/llvmdev/meta.yaml
|
|
index 27b596ffc..1a8e67032 100644
|
|
--- a/conda-recipes/llvmdev/meta.yaml
|
|
+++ b/conda-recipes/llvmdev/meta.yaml
|
|
@@ -1,8 +1,9 @@
|
|
-{% set shortversion = "11.1" %}
|
|
-{% set version = "11.1.0" %}
|
|
-{% set sha256_llvm = "ce8508e318a01a63d4e8b3090ab2ded3c598a50258cc49e2625b9120d4c03ea5" %}
|
|
-{% set sha256_lld = "017a788cbe1ecc4a949abf10755870519086d058a2e99f438829aef24f0c66ce" %}
|
|
-{% set build_number = "5" %}
|
|
+{% set shortversion = "14.0" %}
|
|
+{% set version = "14.0.6" %}
|
|
+{% set sha256_llvm = "050922ecaaca5781fdf6631ea92bc715183f202f9d2f15147226f023414f619a" %}
|
|
+{% set sha256_lld = "0c28ce0496934d37d20fec96591032dd66af8d10178a45762e0e75e85cf95ad3" %}
|
|
+{% set sha256_libunwind = "3bbe9c23c73259fe39c045dc87d0b283236ba6e00750a226b2c2aeac4a51d86b" %}
|
|
+{% set build_number = "0" %}
|
|
|
|
package:
|
|
name: llvmdev
|
|
@@ -13,20 +14,16 @@ source:
|
|
fn: llvm-{{ version }}.src.tar.xz
|
|
sha256: {{ sha256_llvm }}
|
|
patches:
|
|
- - ../partial-testing.patch
|
|
- # Intel SVML optimizations (two patches)
|
|
- - ../intel-D47188-svml-VF.patch
|
|
- # Second patch from https://github.com/conda-forge/llvmdev-feedstock/blob/c706309/recipe/patches/expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
|
|
- - ../expect-fastmath-entrypoints-in-add-TLI-mappings.ll.patch
|
|
- # Reverts a patch limiting non-GlobalValue name length
|
|
- - ../0001-Revert-Limit-size-of-non-GlobalValue-name.patch
|
|
- # Fixes for aarch64 on LLVM 11 from https://reviews.llvm.org/D104123
|
|
- - ../llvm_11_consecutive_registers.patch
|
|
-
|
|
+ - ../llvm14-remove-use-of-clonefile.patch
|
|
+ - ../llvm14-svml.patch
|
|
- url: https://github.com/llvm/llvm-project/releases/download/llvmorg-{{ version }}/lld-{{ version }}.src.tar.xz
|
|
fn: lld-{{ version }}.src.tar.xz
|
|
sha256: {{ sha256_lld }}
|
|
- folder: tools/lld
|
|
+
|
|
+ - url: https://github.com/llvm/llvm-project/releases/download/llvmorg-{{ version }}/libunwind-{{ version }}.src.tar.xz
|
|
+ fn: libunwind-{{ version }}.src.tar.xz
|
|
+ sha256: {{ sha256_libunwind }}
|
|
+ folder: unwind
|
|
|
|
build:
|
|
number: {{ build_number }}
|
|
@@ -81,5 +78,5 @@ about:
|
|
home: http://llvm.org/
|
|
dev_url: https://github.com/llvm-mirror/llvm
|
|
license: NCSA
|
|
- license_file: LICENSE.TXT
|
|
+ license_file: llvm/LICENSE.TXT
|
|
summary: Development headers and libraries for LLVM
|
|
diff --git a/conda-recipes/llvmlite/bld.bat b/conda-recipes/llvmlite/bld.bat
|
|
index 475a0637c..d7342e249 100755
|
|
--- a/conda-recipes/llvmlite/bld.bat
|
|
+++ b/conda-recipes/llvmlite/bld.bat
|
|
@@ -12,11 +12,8 @@ if "%ARCH%"=="32" (
|
|
@rem set CMAKE_GENERATOR_ARCH=Win64
|
|
set CMAKE_GENERATOR_ARCH=x64
|
|
)
|
|
-@rem for older VS:
|
|
-@rem set CMAKE_GENERATOR=Visual Studio 15 2017
|
|
-@rem do not set CMAKE_GENERATOR_TOOLKIT
|
|
set CMAKE_GENERATOR=Visual Studio 16 2019
|
|
-set CMAKE_GENERATOR_TOOLKIT=v141
|
|
+set CMAKE_GENERATOR_TOOLKIT=v142
|
|
|
|
@rem Ensure there are no build leftovers (CMake can complain)
|
|
if exist ffi\build rmdir /S /Q ffi\build
|
|
diff --git a/conda-recipes/llvmlite/meta.yaml b/conda-recipes/llvmlite/meta.yaml
|
|
index bf083becd..ff897f7af 100644
|
|
--- a/conda-recipes/llvmlite/meta.yaml
|
|
+++ b/conda-recipes/llvmlite/meta.yaml
|
|
@@ -1,4 +1,4 @@
|
|
-{% set VERSION_SUFFIX = "" %} # debug version suffix, appended to the version
|
|
+{% set VERSION_SUFFIX = "llvm14" %} # debug version suffix, appended to the version
|
|
|
|
package:
|
|
name: llvmlite
|
|
@@ -23,19 +23,19 @@ requirements:
|
|
# build.sh deals with it!
|
|
- {{ compiler('c') }} # [not (osx or armv6l or armv7l or win)]
|
|
- {{ compiler('cxx') }} # [not (osx or armv6l or armv7l or win)]
|
|
- - vs2017_{{ target_platform }} # [win]
|
|
+ - vs2015_{{ target_platform }} # [win]
|
|
# The DLL build uses cmake on Windows
|
|
- cmake # [win]
|
|
- make # [unix and not (armv6l or armv7l or aarch64)]
|
|
host:
|
|
- python
|
|
# On channel https://anaconda.org/numba/
|
|
- - llvmdev 11.1.0 *5 # [(osx and arm64)]
|
|
- - llvmdev 11.1.0 *4 # [not ((osx and arm64) or win)]
|
|
- - llvmdev 11.1.0 4 # [win]
|
|
+ - llvmdev 14
|
|
- vs2015_runtime # [win]
|
|
# llvmdev is built with libz compression support
|
|
- zlib # [unix and not (armv6l or armv7l)]
|
|
+ # requires libxml2
|
|
+ - libxml2 # [win]
|
|
run:
|
|
- python >=3.8,<3.10
|
|
- vs2015_runtime # [win]
|
|
diff --git a/ffi/Makefile.freebsd b/ffi/Makefile.freebsd
|
|
index ba727e331..7b869e876 100644
|
|
--- a/ffi/Makefile.freebsd
|
|
+++ b/ffi/Makefile.freebsd
|
|
@@ -1,5 +1,5 @@
|
|
|
|
-CXX = clang++ -std=c++11 -stdlib=libc++
|
|
+CXX = clang++ -stdlib=libc++
|
|
|
|
# -flto and --exclude-libs allow us to remove those parts of LLVM we don't use
|
|
CXX_FLTO_FLAGS ?= -flto
|
|
diff --git a/ffi/Makefile.osx b/ffi/Makefile.osx
|
|
index bc192071e..74dccf32c 100644
|
|
--- a/ffi/Makefile.osx
|
|
+++ b/ffi/Makefile.osx
|
|
@@ -1,6 +1,6 @@
|
|
|
|
-CXX = clang++ -std=c++11 -stdlib=libc++
|
|
-CXXFLAGS = $(LLVM_CXXFLAGS)
|
|
+CXX = clang++
|
|
+CXXFLAGS = $(LLVM_CXXFLAGS) -O3
|
|
# Only export the LLVMPY symbols we require and exclude everything else.
|
|
EXPORT = "-Wl,-exported_symbol,_LLVMPY_*"
|
|
LDFLAGS := $(LDFLAGS) $(EXPORT) $(LLVM_LDFLAGS)
|
|
diff --git a/ffi/build.py b/ffi/build.py
|
|
index 55343fca5..e58a691e0 100755
|
|
--- a/ffi/build.py
|
|
+++ b/ffi/build.py
|
|
@@ -72,10 +72,10 @@ def find_windows_generator():
|
|
)
|
|
|
|
generators.extend([
|
|
- # use VS2017 toolkit on VS2019 to match how llvmdev is built
|
|
- ('Visual Studio 16 2019', ('x64' if is_64bit else 'Win32'), 'v141'),
|
|
- # This is the generator configuration for VS2017
|
|
- ('Visual Studio 15 2017' + (' Win64' if is_64bit else ''), None, None)
|
|
+ # use VS2019 to match how llvmdev is built
|
|
+ ('Visual Studio 16 2019', ('x64' if is_64bit else 'Win32'), 'v142'),
|
|
+ # # This is the generator configuration for VS2017
|
|
+ # ('Visual Studio 15 2017' + (' Win64' if is_64bit else ''), None, None)
|
|
])
|
|
for generator in generators:
|
|
build_dir = tempfile.mkdtemp()
|
|
@@ -163,9 +163,10 @@ def main_posix(kind, library_ext):
|
|
print(msg)
|
|
print(warning + '\n')
|
|
else:
|
|
-
|
|
- if not out.startswith('11'):
|
|
- msg = ("Building llvmlite requires LLVM 11.x.x, got "
|
|
+ (version, _) = out.split('.', 1)
|
|
+ version = int(version)
|
|
+ if version < 11 or version > 14:
|
|
+ msg = ("Building llvmlite requires LLVM 11, 12, 13, or 14, got "
|
|
"{!r}. Be sure to set LLVM_CONFIG to the right executable "
|
|
"path.\nRead the documentation at "
|
|
"http://llvmlite.pydata.org/ for more information about "
|
|
diff --git a/ffi/passmanagers.cpp b/ffi/passmanagers.cpp
|
|
index dd67ca5cc..60064cf10 100644
|
|
--- a/ffi/passmanagers.cpp
|
|
+++ b/ffi/passmanagers.cpp
|
|
@@ -16,11 +16,8 @@
|
|
|
|
#include "llvm-c/Transforms/IPO.h"
|
|
#include "llvm-c/Transforms/Scalar.h"
|
|
-#include "llvm/IR/LegacyPassManager.h"
|
|
-#if LLVM_VERSION_MAJOR > 11
|
|
-#include "llvm/IR/RemarkStreamer.h"
|
|
-#endif
|
|
#include "llvm/IR/LLVMRemarkStreamer.h"
|
|
+#include "llvm/IR/LegacyPassManager.h"
|
|
#include "llvm/Remarks/RemarkStreamer.h"
|
|
#include "llvm/Transforms/IPO.h"
|
|
#include "llvm/Transforms/Scalar.h"
|
|
@@ -220,7 +217,11 @@ LLVMPY_AddLazyValueInfoPass(LLVMPassManagerRef PM) {
|
|
}
|
|
API_EXPORT(void)
|
|
LLVMPY_AddLintPass(LLVMPassManagerRef PM) {
|
|
+#if LLVM_VERSION_MAJOR < 12
|
|
unwrap(PM)->add(llvm::createLintPass());
|
|
+#else
|
|
+ unwrap(PM)->add(llvm::createLintLegacyPassPass());
|
|
+#endif
|
|
}
|
|
API_EXPORT(void)
|
|
LLVMPY_AddModuleDebugInfoPrinterPass(LLVMPassManagerRef PM) {
|
|
diff --git a/ffi/targets.cpp b/ffi/targets.cpp
|
|
index 3b5abf510..b96d22c9f 100644
|
|
--- a/ffi/targets.cpp
|
|
+++ b/ffi/targets.cpp
|
|
@@ -6,7 +6,11 @@
|
|
#include "llvm/IR/LegacyPassManager.h"
|
|
#include "llvm/IR/Type.h"
|
|
#include "llvm/Support/Host.h"
|
|
+#if LLVM_VERSION_MAJOR > 13
|
|
+#include "llvm/MC/TargetRegistry.h"
|
|
+#else
|
|
#include "llvm/Support/TargetRegistry.h"
|
|
+#endif
|
|
#include "llvm/Target/TargetMachine.h"
|
|
|
|
#include <cstdio>
|
|
@@ -204,7 +208,11 @@ LLVMPY_CreateTargetMachine(LLVMTargetRef T, const char *Triple, const char *CPU,
|
|
rm = Reloc::DynamicNoPIC;
|
|
|
|
TargetOptions opt;
|
|
+#if LLVM_VERSION_MAJOR < 12
|
|
opt.PrintMachineCode = PrintMC;
|
|
+#else
|
|
+ opt.MCOptions.ShowMCInst = PrintMC;
|
|
+#endif
|
|
opt.MCOptions.ABIName = ABIName;
|
|
|
|
bool jit = JIT;
|
|
diff --git a/ffi/value.cpp b/ffi/value.cpp
|
|
index 771acd423..01871699d 100644
|
|
--- a/ffi/value.cpp
|
|
+++ b/ffi/value.cpp
|
|
@@ -153,8 +153,13 @@ LLVMPY_ArgumentAttributesIter(LLVMValueRef A) {
|
|
using namespace llvm;
|
|
Argument *arg = unwrap<Argument>(A);
|
|
unsigned argno = arg->getArgNo();
|
|
- AttributeSet attrs =
|
|
- arg->getParent()->getAttributes().getParamAttributes(argno);
|
|
+ const AttributeSet attrs = arg->getParent()->getAttributes().
|
|
+#if LLVM_VERSION_MAJOR < 14
|
|
+ getParamAttributes(argno)
|
|
+#else
|
|
+ getParamAttrs(argno)
|
|
+#endif
|
|
+ ;
|
|
return wrap(new AttributeSetIterator(attrs.begin(), attrs.end()));
|
|
}
|
|
|
|
@@ -353,7 +358,11 @@ LLVMPY_GetElementType(LLVMTypeRef type) {
|
|
llvm::Type *unwrapped = llvm::unwrap(type);
|
|
llvm::PointerType *ty = llvm::dyn_cast<llvm::PointerType>(unwrapped);
|
|
if (ty != nullptr) {
|
|
+#if LLVM_VERSION_MAJOR < 14
|
|
return llvm::wrap(ty->getElementType());
|
|
+#else
|
|
+ return llvm::wrap(ty->getPointerElementType());
|
|
+#endif
|
|
}
|
|
return nullptr;
|
|
}
|
|
diff --git a/llvmlite/binding/passmanagers.py b/llvmlite/binding/passmanagers.py
|
|
index 26f7bd259..4b9daf468 100644
|
|
--- a/llvmlite/binding/passmanagers.py
|
|
+++ b/llvmlite/binding/passmanagers.py
|
|
@@ -199,7 +199,8 @@ def add_lint_pass(self):
|
|
"""
|
|
See https://llvm.org/docs/Passes.html#lint-statically-lint-checks-llvm-ir
|
|
|
|
- LLVM 11+: `llvm::createLintPass`
|
|
+ LLVM 11: `llvm::createLintPass`
|
|
+ LLVM 12+: `llvm::createLintLegacyPassPass`
|
|
""" # noqa E501
|
|
ffi.lib.LLVMPY_AddLintPass(self)
|
|
|
|
diff --git a/llvmlite/tests/test_binding.py b/llvmlite/tests/test_binding.py
|
|
index dc4dbc484..70902e04c 100644
|
|
--- a/llvmlite/tests/test_binding.py
|
|
+++ b/llvmlite/tests/test_binding.py
|
|
@@ -640,7 +640,7 @@ def test_set_option(self):
|
|
def test_version(self):
|
|
major, minor, patch = llvm.llvm_version_info
|
|
# one of these can be valid
|
|
- valid = [(11,)]
|
|
+ valid = [(11,), (12, ), (13, ), (14, )]
|
|
self.assertIn((major,), valid)
|
|
self.assertIn(patch, range(10))
|
|
|
|
From 355338e931f488926b07a2f6eaf83ecd39e9abb7 Mon Sep 17 00:00:00 2001
|
|
From: Andre Masella <andre@masella.name>
|
|
Date: Thu, 8 Dec 2022 17:26:05 -0500
|
|
Subject: [PATCH] Automatically detect common return blocks in ref prune
|
|
|
|
Change reference pruning algorithm to detect when a common return block is
|
|
generated and determine if it return non-zero indicating an exception path.
|
|
LLVM 14 automatically generates code like this.
|
|
---
|
|
ffi/custom_passes.cpp | 83 ++++++++++++++++++++++++---------
|
|
llvmlite/tests/test_refprune.py | 25 +++++++++-
|
|
2 files changed, 85 insertions(+), 23 deletions(-)
|
|
|
|
diff --git a/ffi/custom_passes.cpp b/ffi/custom_passes.cpp
|
|
index 21e0bbcff..a04b4b64c 100644
|
|
--- a/ffi/custom_passes.cpp
|
|
+++ b/ffi/custom_passes.cpp
|
|
@@ -905,9 +905,7 @@ struct RefPrunePass : public FunctionPass {
|
|
}
|
|
|
|
/**
|
|
- * Check if a basic block is a block which raises, this relies on a
|
|
- * metadata "ret_is_raise" being present the terminator and the
|
|
- * terminator opcode being Instruction::Ret.
|
|
+ * Check if a basic block is a block which raises, based on the return value.
|
|
*
|
|
* Parameters:
|
|
* - bb a basic block
|
|
@@ -920,27 +918,68 @@ struct RefPrunePass : public FunctionPass {
|
|
|
|
// Get the terminator
|
|
auto term = bb->getTerminator();
|
|
- // Get the opcode of the terminator, if it's not a Ret then return false
|
|
- if (term->getOpcode() != Instruction::Ret)
|
|
- return false;
|
|
- // Get the metadata on the terminator node
|
|
- auto md = term->getMetadata("ret_is_raise");
|
|
- // If there's no metadata return false (normal or unmarked Ret)
|
|
- if (!md)
|
|
- return false;
|
|
- // If the number of operands on the metadata is not 1 then return false
|
|
- if (md->getNumOperands() != 1)
|
|
+ // Get the opcode of the terminator, if it's a Ret then check
|
|
+ if (term->getOpcode() == Instruction::Ret) {
|
|
+ // With one operand
|
|
+ if (term->getNumOperands() != 1) {
|
|
+ return false;
|
|
+ }
|
|
+ auto operand = term->getOperand(0);
|
|
+ // If the operand is a constant, check if it indicates an exception
|
|
+ auto int_operand = dyn_cast<ConstantInt>(operand);
|
|
+ if (int_operand && int_operand->isOneValue()) {
|
|
+ return true;
|
|
+ }
|
|
+ // If the operand is a PHI node, check if there is a non-exception
|
|
+ // path. We don't know which path we're on, but since the
|
|
+ // exceptional path will lookahead, so if there is a non-exceptional
|
|
+ // path, we can assume were on it.
|
|
+ auto phi_operand = dyn_cast<PHINode>(operand);
|
|
+ if (phi_operand) {
|
|
+ for (auto& phi_arg_value : phi_operand->incoming_values()) {
|
|
+ auto arg_value = dyn_cast<ConstantInt>(phi_arg_value);
|
|
+ if (arg_value && !arg_value->isOneValue()) {
|
|
+ return false;
|
|
+ }
|
|
+ }
|
|
+ return true;
|
|
+ }
|
|
return false;
|
|
- // Fetch the ref to the metadata operand at location 0
|
|
- auto &operand = md->getOperand(0);
|
|
- // and then cast the const as Metadata (Numba sets this as literal 1)
|
|
- auto data = dyn_cast<ConstantAsMetadata>(operand.get());
|
|
- // If dyn_cast failed type check then return false
|
|
- if (!data)
|
|
+ } else if (term->getOpcode() == Instruction::Br &&
|
|
+ term->getNumOperands() == 1) {
|
|
+ // If it's a branch, it might be a common return block
|
|
+ auto first =
|
|
+ term->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(true);
|
|
+ if (!first) {
|
|
+ // Malformed block with no terminal instruction
|
|
+ return false;
|
|
+ }
|
|
+ // Our one and only instruction should be a return
|
|
+ if (first->getOpcode() != Instruction::Ret) {
|
|
+ return false;
|
|
+ }
|
|
+ // With one operand
|
|
+ if (first->getNumOperands() != 1) {
|
|
+ return false;
|
|
+ }
|
|
+ auto operand = first->getOperand(0);
|
|
+ // If the operand is a constant, check if it indicates an exception
|
|
+ auto int_operand = dyn_cast<ConstantInt>(operand);
|
|
+ if (int_operand && int_operand->isOneValue()) {
|
|
+ return true;
|
|
+ }
|
|
+ // If the operand is a PHI node, check if the path we're on will
|
|
+ // yield a value indicating an exception
|
|
+ auto phi_operand = dyn_cast<PHINode>(operand);
|
|
+ if (phi_operand) {
|
|
+ auto arg_value = dyn_cast<ConstantInt>(
|
|
+ phi_operand->getIncomingValueForBlock(bb));
|
|
+ return arg_value && arg_value->isOneValue();
|
|
+ }
|
|
+ // This path doesn't raise
|
|
return false;
|
|
- // get the value of the casted metadata and then return bool on whether
|
|
- // it is the number one.
|
|
- return data->getValue()->isOneValue();
|
|
+ }
|
|
+ return false;
|
|
}
|
|
|
|
/**
|
|
diff --git a/llvmlite/tests/test_refprune.py b/llvmlite/tests/test_refprune.py
|
|
index d4f7b3035..ba53ab1ff 100644
|
|
--- a/llvmlite/tests/test_refprune.py
|
|
+++ b/llvmlite/tests/test_refprune.py
|
|
@@ -456,7 +456,8 @@ def test_fanout_raise_1(self):
|
|
|
|
def test_fanout_raise_2(self):
|
|
mod, stats = self.check(self.fanout_raise_2)
|
|
- self.assertEqual(stats.fanout_raise, 0)
|
|
+ # Change in behaviour: ignore bad metadata
|
|
+ self.assertEqual(stats.fanout_raise, 2)
|
|
|
|
fanout_raise_3 = r"""
|
|
define i32 @main(i8* %ptr, i1 %cond) {
|
|
@@ -495,6 +496,28 @@ def test_fanout_raise_4(self):
|
|
mod, stats = self.check(self.fanout_raise_4)
|
|
self.assertEqual(stats.fanout_raise, 0)
|
|
|
|
+ fanout_raise_5 = r"""
|
|
+define i32 @main(i8* %ptr, i1 %cond) {
|
|
+bb_A:
|
|
+ call void @NRT_incref(i8* %ptr)
|
|
+ br i1 %cond, label %bb_B, label %bb_C
|
|
+bb_B:
|
|
+ call void @NRT_decref(i8* %ptr)
|
|
+ br label %common.ret
|
|
+bb_C:
|
|
+ br label %common.ret ; pretend we throw an exception
|
|
+common.ret:
|
|
+ %common.ret.op = phi i32 [ 0, %bb_B ], [ 1, %bb_C ]
|
|
+ ret i32 %common.ret.op
|
|
+}
|
|
+
|
|
+!0 = !{i32 1}
|
|
+"""
|
|
+
|
|
+ def test_fanout_raise_5(self):
|
|
+ mod, stats = self.check(self.fanout_raise_5)
|
|
+ self.assertEqual(stats.fanout_raise, 2)
|
|
+
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|