From 65bcb0318b2ea316f4bb578becc877051f61a122 Mon Sep 17 00:00:00 2001 From: Graham Markall Date: Fri, 28 Jun 2024 15:16:37 +0100 Subject: [PATCH 1/7] Use LLVM 15 by default, add experimental LLVM 16 support Changes required to use LLVM 15 by default, and to support LLVM 16 experimentally, include: CI config: - Bump all LLVM 14 configurations to use LLVM 15 - Dump the LLVM 15 configurations to use LLVM 16 llvmlite: - Always set opaque pointers to false (they are the default in both 15 and 16, unlike 14). - Don't initialize of `ObjCARCOpts` in LLVM 16 and above. This was removed in LLVM 16 by: ``` commit 4153f989bab0f2f300fa8d3001ebeef7b6d9672c Author: Arthur Eubanks Date: Sun Oct 2 13:20:21 2022 -0700 [ObjCARC] Remove legacy PM versions of optimization passes ``` - Remove the `AggressiveInstCombine` and `PruneEH` passes. These were removed from the legacy pass manager in LLVM 16 by: ``` commit 70dc3b811e4926fa2c88bd3b53b29c46fcba1a90 Author: Arthur Eubanks Date: Mon Oct 31 14:50:38 2022 -0700 [AggressiveInstCombine] Remove legacy PM pass ``` and ``` commit 46fc75ab28b78a730ea21fd7daba6443937bfaac Author: Sebastian Peryt Date: Mon Sep 26 18:31:32 2022 -0700 [NFC][2/n] Remove PrunePH pass ``` - Modify `reserveAllocationSpace` in the memory manager to use `Align` for the type of alignments in LLVM 16 - this mirrors an upstream change. - Remove LLVM 14-specific code paths (and one vestigial LLVM < 9 path). - Update the function attributes test to recognize the new form of memory attributes, - `memory()` as opposed to individual attributes like `readonly`. See: https://releases.llvm.org/16.0.0/docs/LangRef.html#function-attributes --- azure-pipelines.yml | 8 ++-- buildscripts/azure/azure-windows.yml | 4 +- buildscripts/incremental/build.cmd | 2 +- .../incremental/setup_conda_environment.cmd | 4 +- .../incremental/setup_conda_environment.sh | 6 +-- ffi/build.py | 8 ++-- ffi/core.cpp | 4 -- ffi/initfini.cpp | 2 + ffi/memorymanager.cpp | 39 ++++++++++++------- ffi/memorymanager.h | 15 +++++-- ffi/orcjit.cpp | 9 ----- ffi/passmanagers.cpp | 26 ++----------- ffi/targets.cpp | 1 + ffi/value.cpp | 2 - llvmlite/binding/passmanagers.py | 35 ++++++++++------- llvmlite/tests/test_binding.py | 34 ++++++++-------- 16 files changed, 98 insertions(+), 101 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 7e40fdedd..75fec8cd3 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -18,10 +18,10 @@ jobs: PYTHON: '3.12' CONDA_ENV: cienv - llvm15: + llvm16: PYTHON: '3.12' CONDA_ENV: cienv - LLVM: '15' + LLVM: '16' - template: buildscripts/azure/azure-linux-macos.yml parameters: @@ -67,10 +67,10 @@ jobs: CONDA_ENV: cienv WHEEL: 'yes' - llvm15: + llvm16: PYTHON: '3.12' CONDA_ENV: cienv - LLVM: '15' + LLVM: '16' - template: buildscripts/azure/azure-windows.yml parameters: diff --git a/buildscripts/azure/azure-windows.yml b/buildscripts/azure/azure-windows.yml index fd61bb7a8..5f00cf9c3 100644 --- a/buildscripts/azure/azure-windows.yml +++ b/buildscripts/azure/azure-windows.yml @@ -22,10 +22,10 @@ jobs: PYTHON: '3.12' CONDA_ENV: cienv - llvm15: + llvm16: PYTHON: '3.12' CONDA_ENV: cienv - LLVM: '15' + LLVM: '16' steps: diff --git a/buildscripts/incremental/build.cmd b/buildscripts/incremental/build.cmd index 182cdde8f..fb3221895 100644 --- a/buildscripts/incremental/build.cmd +++ b/buildscripts/incremental/build.cmd @@ -15,7 +15,7 @@ call activate %CONDA_ENV% @rem - https://github.com/conda-forge/llvmdev-feedstock/issues/175 @rem - https://github.com/conda-forge/llvmdev-feedstock/pull/223 @rem - https://github.com/MicrosoftDocs/visualstudio-docs/issues/7774 -if "%LLVM%"=="15" ( +if "%LLVM%"=="16" ( call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\VsDevCmd.bat" if %errorlevel% neq 0 exit /b %errorlevel% ) diff --git a/buildscripts/incremental/setup_conda_environment.cmd b/buildscripts/incremental/setup_conda_environment.cmd index afd47c5f2..ac572f3a7 100644 --- a/buildscripts/incremental/setup_conda_environment.cmd +++ b/buildscripts/incremental/setup_conda_environment.cmd @@ -14,10 +14,10 @@ call activate %CONDA_ENV% if %errorlevel% neq 0 exit /b %errorlevel% @rem Install llvmdev -if "%LLVM%"=="15" ( +if "%LLVM%"=="16" ( set LLVMDEV_CHANNEL="conda-forge" ) else ( - set LLVMDEV_CHANNEL="numba/label/dev" + set LLVMDEV_CHANNEL="numba" ) call conda install -y -q -c %LLVMDEV_CHANNEL% llvmdev="%LLVM%" libxml2 diff --git a/buildscripts/incremental/setup_conda_environment.sh b/buildscripts/incremental/setup_conda_environment.sh index 7dd431848..eec2c0fe7 100755 --- a/buildscripts/incremental/setup_conda_environment.sh +++ b/buildscripts/incremental/setup_conda_environment.sh @@ -27,10 +27,10 @@ source activate $CONDA_ENV set -v # Install llvmdev (separate channel, for now) -if [ "$LLVM" == "15" ]; then - $CONDA_INSTALL -c conda-forge llvmdev="15" +if [ "$LLVM" == "16" ]; then + $CONDA_INSTALL -c conda-forge llvmdev="16" else - $CONDA_INSTALL -c numba/label/dev llvmdev="14.*" + $CONDA_INSTALL -c numba llvmdev="15.*" fi # Install the compiler toolchain, for osx, bootstrapping needed diff --git a/ffi/build.py b/ffi/build.py index 97bdda426..61d68e3bc 100755 --- a/ffi/build.py +++ b/ffi/build.py @@ -167,13 +167,13 @@ def main_posix(kind, library_ext): else: (version, _) = out.split('.', 1) version = int(version) - if version == 15: - msg = ("Building with LLVM 15; note that LLVM 15 support is " + if version == 16: + msg = ("Building with LLVM 16; note that LLVM 16 support is " "presently experimental") show_warning(msg) - elif version != 14: + elif version != 15: - msg = ("Building llvmlite requires LLVM 14, got " + msg = ("Building llvmlite requires LLVM 15, got " "{!r}. Be sure to set LLVM_CONFIG to the right executable " "path.\nRead the documentation at " "http://llvmlite.pydata.org/ for more information about " diff --git a/ffi/core.cpp b/ffi/core.cpp index 0a4c5bb10..92fc09d87 100644 --- a/ffi/core.cpp +++ b/ffi/core.cpp @@ -23,18 +23,14 @@ LLVMPY_DisposeString(const char *msg) { free(const_cast(msg)); } API_EXPORT(LLVMContextRef) LLVMPY_GetGlobalContext() { auto context = LLVMGetGlobalContext(); -#if LLVM_VERSION_MAJOR > 14 LLVMContextSetOpaquePointers(context, false); -#endif return context; } API_EXPORT(LLVMContextRef) LLVMPY_ContextCreate() { LLVMContextRef context = LLVMContextCreate(); -#if LLVM_VERSION_MAJOR > 14 LLVMContextSetOpaquePointers(context, false); -#endif return context; } diff --git a/ffi/initfini.cpp b/ffi/initfini.cpp index ae13d292a..dc05e6724 100644 --- a/ffi/initfini.cpp +++ b/ffi/initfini.cpp @@ -15,7 +15,9 @@ extern "C" { INIT(Core) INIT(TransformUtils) INIT(ScalarOpts) +#if LLVM_VERSION_MAJOR < 16 INIT(ObjCARCOpts) +#endif INIT(Vectorization) INIT(InstCombine) INIT(IPO) diff --git a/ffi/memorymanager.cpp b/ffi/memorymanager.cpp index 3163e2430..3f3ee79c7 100644 --- a/ffi/memorymanager.cpp +++ b/ffi/memorymanager.cpp @@ -129,20 +129,23 @@ bool LlvmliteMemoryManager::hasSpace(const MemoryGroup &MemGroup, return false; } -void LlvmliteMemoryManager::reserveAllocationSpace( - uintptr_t CodeSize, uint32_t CodeAlign, uintptr_t RODataSize, - uint32_t RODataAlign, uintptr_t RWDataSize, uint32_t RWDataAlign) { +void LlvmliteMemoryManager::reserveAllocationSpace(uintptr_t CodeSize, + LLVMLITE_ALIGN CodeAlign, + uintptr_t RODataSize, + LLVMLITE_ALIGN RODataAlign, + uintptr_t RWDataSize, + LLVMLITE_ALIGN RWDataAlign) { LLVM_DEBUG( dbgs() << "\nLlvmliteMemoryManager::reserveAllocationSpace() request:\n\n"); LLVM_DEBUG(dbgs() << "Code size / align: " << format_hex(CodeSize, 2, true) - << " / " << CodeAlign << "\n"); + << " / " << GET_ALIGN_VALUE(CodeAlign) << "\n"); LLVM_DEBUG(dbgs() << "ROData size / align: " - << format_hex(RODataSize, 2, true) << " / " << RODataAlign - << "\n"); + << format_hex(RODataSize, 2, true) << " / " + << GET_ALIGN_VALUE(RODataAlign) << "\n"); LLVM_DEBUG(dbgs() << "RWData size / align: " - << format_hex(RWDataSize, 2, true) << " / " << RWDataAlign - << "\n"); + << format_hex(RWDataSize, 2, true) << " / " + << GET_ALIGN_VALUE(RWDataAlign) << "\n"); if (CodeSize == 0 && RODataSize == 0 && RWDataSize == 0) { LLVM_DEBUG(dbgs() << "No memory requested - returning early.\n"); @@ -152,23 +155,31 @@ void LlvmliteMemoryManager::reserveAllocationSpace( // Code alignment needs to be at least the stub alignment - however, we // don't have an easy way to get that here so as a workaround, we assume // it's 8, which is the largest value I observed across all platforms. +#if LLVM_VERSION_MAJOR < 16 constexpr uint32_t StubAlign = 8; - CodeAlign = std::max(CodeAlign, StubAlign); +#else + constexpr uint64_t StubAlign = 8; +#endif + + CodeAlign = LLVMLITE_ALIGN(std::max(GET_ALIGN_VALUE(CodeAlign), StubAlign)); // ROData and RWData may not need to be aligned to the StubAlign, but the // stub alignment seems like a reasonable (if slightly arbitrary) minimum // alignment for them that should not cause any issues on all (i.e. 64-bit) // platforms. - RODataAlign = std::max(RODataAlign, StubAlign); - RWDataAlign = std::max(RWDataAlign, StubAlign); + RODataAlign = + LLVMLITE_ALIGN(std::max(GET_ALIGN_VALUE(RODataAlign), StubAlign)); + RWDataAlign = + LLVMLITE_ALIGN(std::max(GET_ALIGN_VALUE(RWDataAlign), StubAlign)); // Get space required for each section. Use the same calculation as // allocateSection because we need to be able to satisfy it. - uintptr_t RequiredCodeSize = alignTo(CodeSize, CodeAlign) + CodeAlign; + uintptr_t RequiredCodeSize = + alignTo(CodeSize, CodeAlign) + GET_ALIGN_VALUE(CodeAlign); uintptr_t RequiredRODataSize = - alignTo(RODataSize, RODataAlign) + RODataAlign; + alignTo(RODataSize, RODataAlign) + GET_ALIGN_VALUE(RODataAlign); uintptr_t RequiredRWDataSize = - alignTo(RWDataSize, RWDataAlign) + RWDataAlign; + alignTo(RWDataSize, RWDataAlign) + GET_ALIGN_VALUE(RWDataAlign); uint64_t TotalSize = RequiredCodeSize + RequiredRODataSize + RequiredRWDataSize; diff --git a/ffi/memorymanager.h b/ffi/memorymanager.h index 9ed028542..c0bdddaab 100644 --- a/ffi/memorymanager.h +++ b/ffi/memorymanager.h @@ -42,6 +42,14 @@ class __attribute__((visibility("default"))) ErrorInfoBase; #include #include +#if LLVM_VERSION_MAJOR < 16 +#define LLVMLITE_ALIGN uint32_t +#define GET_ALIGN_VALUE(align) align +#else +#define LLVMLITE_ALIGN Align +#define GET_ALIGN_VALUE(align) align.value() +#endif + namespace llvm { /// This is a simple memory manager which implements the methods called by @@ -174,11 +182,12 @@ class API_EXPORT(LlvmliteMemoryManager : public RTDyldMemoryManager) { virtual bool needsToReserveAllocationSpace() override { return true; } - virtual void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign, + virtual void reserveAllocationSpace(uintptr_t CodeSize, + LLVMLITE_ALIGN CodeAlign, uintptr_t RODataSize, - uint32_t RODataAlign, + LLVMLITE_ALIGN RODataAlign, uintptr_t RWDataSize, - uint32_t RWDataAlign) override; + LLVMLITE_ALIGN RWDataAlign) override; private: struct FreeMemBlock { diff --git a/ffi/orcjit.cpp b/ffi/orcjit.cpp index 60a4d8f3a..e19d6b97d 100644 --- a/ffi/orcjit.cpp +++ b/ffi/orcjit.cpp @@ -158,12 +158,7 @@ LLVMPY_LLJITLookup(std::shared_ptr *lljit, const char *dylib_name, return nullptr; } -#if LLVM_VERSION_MAJOR > 14 *addr = sym->getValue(); -#else - *addr = sym->getAddress(); -#endif - return new JITDylibTracker(*lljit, *dylib, std::move(dylib->createResourceTracker())); } @@ -339,11 +334,7 @@ LLVMPY_LLJIT_Link(std::shared_ptr *lljit, const char *libraryName, LLVMDisposeErrorMessage(message); return nullptr; } -#if LLVM_VERSION_MAJOR > 14 exports[export_idx].address = lookup->getValue(); -#else - exports[export_idx].address = lookup->getAddress(); -#endif } return new JITDylibTracker(*lljit, *dylib, std::move(dylib->getDefaultResourceTracker())); diff --git a/ffi/passmanagers.cpp b/ffi/passmanagers.cpp index da4a076b4..3e3c1c1bb 100644 --- a/ffi/passmanagers.cpp +++ b/ffi/passmanagers.cpp @@ -162,13 +162,8 @@ LLVMPY_AddCallGraphDOTPrinterPass(LLVMPassManagerRef PM) { API_EXPORT(void) LLVMPY_AddDotDomPrinterPass(LLVMPassManagerRef PM, bool showBody) { -#if LLVM_VERSION_MAJOR > 14 unwrap(PM)->add(showBody ? llvm::createDomPrinterWrapperPassPass() : llvm::createDomOnlyPrinterWrapperPassPass()); -#else - unwrap(PM)->add(showBody ? llvm::createDomPrinterPass() - : llvm::createDomOnlyPrinterPass()); -#endif } API_EXPORT(void) @@ -178,13 +173,8 @@ LLVMPY_AddGlobalsModRefAAPass(LLVMPassManagerRef PM) { API_EXPORT(void) LLVMPY_AddDotPostDomPrinterPass(LLVMPassManagerRef PM, bool showBody) { -#if LLVM_VERSION_MAJOR > 14 unwrap(PM)->add(showBody ? llvm::createPostDomPrinterWrapperPassPass() : llvm::createPostDomOnlyPrinterWrapperPassPass()); -#else - unwrap(PM)->add(showBody ? llvm::createPostDomPrinterPass() - : llvm::createPostDomOnlyPrinterPass()); -#endif } API_EXPORT(void) @@ -255,13 +245,6 @@ LLVMPY_AddAlwaysInlinerPass(LLVMPassManagerRef PM, bool insertLifetime) { unwrap(PM)->add(llvm::createAlwaysInlinerLegacyPass(insertLifetime)); } -#if LLVM_VERSION_MAJOR < 15 -API_EXPORT(void) -LLVMPY_AddArgPromotionPass(LLVMPassManagerRef PM, unsigned int maxElements) { - unwrap(PM)->add(llvm::createArgumentPromotionPass(maxElements)); -} -#endif - API_EXPORT(void) LLVMPY_AddBreakCriticalEdgesPass(LLVMPassManagerRef PM) { unwrap(PM)->add(llvm::createBreakCriticalEdgesPass()); @@ -293,10 +276,12 @@ LLVMPY_AddDeadCodeEliminationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createDeadCodeEliminationPass()); } +#if LLVM_VERSION_MAJOR < 16 API_EXPORT(void) LLVMPY_AddAggressiveInstructionCombiningPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createAggressiveInstCombinerPass()); } +#endif API_EXPORT(void) LLVMPY_AddInternalizePass(LLVMPassManagerRef PM) { @@ -349,12 +334,7 @@ LLVMPY_AddLoopUnrollAndJamPass(LLVMPassManagerRef PM) { API_EXPORT(void) LLVMPY_AddLoopUnswitchPass(LLVMPassManagerRef PM, bool optimizeForSize, bool hasBranchDivergence) { -#if LLVM_VERSION_MAJOR > 14 unwrap(PM)->add(createSimpleLoopUnswitchLegacyPass(optimizeForSize)); -#else - unwrap(PM)->add( - createLoopUnswitchPass(optimizeForSize, hasBranchDivergence)); -#endif } API_EXPORT(void) @@ -392,10 +372,12 @@ LLVMPY_AddPartialInliningPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createPartialInliningPass()); } +#if LLVM_VERSION_MAJOR < 16 API_EXPORT(void) LLVMPY_AddPruneExceptionHandlingPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createPruneEHPass()); } +#endif API_EXPORT(void) LLVMPY_AddReassociatePass(LLVMPassManagerRef PM) { diff --git a/ffi/targets.cpp b/ffi/targets.cpp index 1b1bbf9f1..da7ba521c 100644 --- a/ffi/targets.cpp +++ b/ffi/targets.cpp @@ -1,6 +1,7 @@ #include "core.h" #include "llvm-c/Target.h" #include "llvm-c/TargetMachine.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/LegacyPassManager.h" diff --git a/ffi/value.cpp b/ffi/value.cpp index 2f86806bd..b59e33ae0 100644 --- a/ffi/value.cpp +++ b/ffi/value.cpp @@ -6,9 +6,7 @@ // the following is needed for WriteGraph() #include "llvm/Analysis/CFGPrinter.h" -#if LLVM_VERSION_MAJOR > 14 #include "llvm/Support/GraphWriter.h" -#endif /* An iterator around a attribute list, including the stop condition */ struct AttributeListIterator { diff --git a/llvmlite/binding/passmanagers.py b/llvmlite/binding/passmanagers.py index af6152f63..92aa06738 100644 --- a/llvmlite/binding/passmanagers.py +++ b/llvmlite/binding/passmanagers.py @@ -1,4 +1,4 @@ -from ctypes import (c_bool, c_char_p, c_int, c_size_t, c_uint, Structure, byref, +from ctypes import (c_bool, c_char_p, c_int, c_size_t, Structure, byref, POINTER) from collections import namedtuple from enum import IntFlag @@ -8,11 +8,11 @@ from tempfile import mkstemp from llvmlite.binding.common import _encode_string +llvm_version_major = llvm_version_info[0] + _prunestats = namedtuple('PruneStats', ('basicblock diamond fanout fanout_raise')) -llvm_version_major = llvm_version_info[0] - class PruneStats(_prunestats): """ Holds statistics from reference count pruning. @@ -261,9 +261,7 @@ def add_arg_promotion_pass(self, max_elements=3): LLVM 14: `llvm::createArgumentPromotionPass` """ # noqa E501 - if llvm_version_major > 14: - raise RuntimeError('ArgumentPromotionPass unavailable in LLVM > 14') - ffi.lib.LLVMPY_AddArgPromotionPass(self, max_elements) + raise RuntimeError('ArgumentPromotionPass unavailable in LLVM > 14') def add_break_critical_edges_pass(self): """ @@ -342,6 +340,10 @@ def add_aggressive_instruction_combining_pass(self): LLVM 14: `llvm::createAggressiveInstCombinerPass` """ # noqa E501 + if llvm_version_major > 15: + msg = "AggressiveInstrCombinerPass unavailable in LLVM > 15" + raise RuntimeError(msg) + ffi.lib.LLVMPY_AddAggressiveInstructionCombiningPass(self) def add_internalize_pass(self): @@ -538,6 +540,8 @@ def add_prune_exception_handling_pass(self): LLVM 14: `llvm::createPruneEHPass` """ # noqa E501 + if llvm_version_major > 15: + raise RuntimeError("PruneEHPass unavailable in LLVM > 15") ffi.lib.LLVMPY_AddPruneExceptionHandlingPass(self) def add_reassociate_expressions_pass(self): @@ -871,18 +875,16 @@ def run_with_remarks(self, function, remarks_format='yaml', ffi.lib.LLVMPY_AddScalarEvolutionAAPass.argtypes = [ffi.LLVMPassManagerRef] ffi.lib.LLVMPY_AddAggressiveDCEPass.argtypes = [ffi.LLVMPassManagerRef] ffi.lib.LLVMPY_AddAlwaysInlinerPass.argtypes = [ffi.LLVMPassManagerRef, c_bool] - -if llvm_version_major < 15: - ffi.lib.LLVMPY_AddArgPromotionPass.argtypes = [ - ffi.LLVMPassManagerRef, c_uint] - ffi.lib.LLVMPY_AddBreakCriticalEdgesPass.argtypes = [ffi.LLVMPassManagerRef] ffi.lib.LLVMPY_AddDeadStoreEliminationPass.argtypes = [ ffi.LLVMPassManagerRef] ffi.lib.LLVMPY_AddReversePostOrderFunctionAttrsPass.argtypes = [ ffi.LLVMPassManagerRef] -ffi.lib.LLVMPY_AddAggressiveInstructionCombiningPass.argtypes = [ - ffi.LLVMPassManagerRef] + +if llvm_version_major < 16: + ffi.lib.LLVMPY_AddAggressiveInstructionCombiningPass.argtypes = [ + ffi.LLVMPassManagerRef] + ffi.lib.LLVMPY_AddInternalizePass.argtypes = [ffi.LLVMPassManagerRef] ffi.lib.LLVMPY_AddLCSSAPass.argtypes = [ffi.LLVMPassManagerRef] ffi.lib.LLVMPY_AddLoopDeletionPass.argtypes = [ffi.LLVMPassManagerRef] @@ -901,7 +903,12 @@ def run_with_remarks(self, function, remarks_format='yaml', ffi.lib.LLVMPY_AddMergeFunctionsPass.argtypes = [ffi.LLVMPassManagerRef] ffi.lib.LLVMPY_AddMergeReturnsPass.argtypes = [ffi.LLVMPassManagerRef] ffi.lib.LLVMPY_AddPartialInliningPass.argtypes = [ffi.LLVMPassManagerRef] -ffi.lib.LLVMPY_AddPruneExceptionHandlingPass.argtypes = [ffi.LLVMPassManagerRef] + +if llvm_version_major < 16: + ffi.lib.LLVMPY_AddPruneExceptionHandlingPass.argtypes = [ + ffi.LLVMPassManagerRef + ] + ffi.lib.LLVMPY_AddReassociatePass.argtypes = [ffi.LLVMPassManagerRef] ffi.lib.LLVMPY_AddDemoteRegisterToMemoryPass.argtypes = [ffi.LLVMPassManagerRef] ffi.lib.LLVMPY_AddSinkPass.argtypes = [ffi.LLVMPassManagerRef] diff --git a/llvmlite/tests/test_binding.py b/llvmlite/tests/test_binding.py index cfa6bdeae..0dfbeede9 100644 --- a/llvmlite/tests/test_binding.py +++ b/llvmlite/tests/test_binding.py @@ -18,8 +18,6 @@ from llvmlite.binding import ffi from llvmlite.tests import TestCase -llvm_version_major = llvm.llvm_version_info[0] - # arvm7l needs extra ABI symbols to link successfully if platform.machine() == 'armv7l': llvm.load_library_permanently('libgcc_s.so.1') @@ -883,7 +881,7 @@ def test_set_option(self): def test_version(self): major, minor, patch = llvm.llvm_version_info # one of these can be valid - valid = (14, 15) + valid = (15, 16) self.assertIn(major, valid) self.assertIn(patch, range(8)) @@ -1082,13 +1080,9 @@ def test_parse_bitcode_error(self): with self.assertRaises(RuntimeError) as cm: llvm.parse_bitcode(b"") self.assertIn("LLVM bitcode parsing error", str(cm.exception)) - # for llvm < 9 - if llvm.llvm_version_info[0] < 9: - self.assertIn("Invalid bitcode signature", str(cm.exception)) - else: - self.assertIn( - "file too small to contain bitcode header", str(cm.exception), - ) + self.assertIn( + "file too small to contain bitcode header", str(cm.exception), + ) def test_bitcode_roundtrip(self): # create a new context to avoid struct renaming @@ -1709,11 +1703,13 @@ def test_instruction_operands(self): self.assertEqual(str(operands[1].type), 'i32') def test_function_attributes(self): + ver = llvm.llvm_version_info[0] + readonly_attrs = [b'memory(read)' if ver == 16 else b'readonly'] mod = self.module(asm_attributes) for func in mod.functions: attrs = list(func.attributes) if func.name == 'a_readonly_func': - self.assertEqual(attrs, [b'readonly']) + self.assertEqual(attrs, readonly_attrs) elif func.name == 'a_arg0_return_func': self.assertEqual(attrs, []) args = list(func.arguments) @@ -2462,6 +2458,8 @@ def pm(self): return llvm.create_module_pass_manager() def test_populate(self): + llvm_ver = llvm.llvm_version_info[0] + pm = self.pm() pm.add_target_library_info("") # unspecified target triple pm.add_constant_merge_pass() @@ -2486,12 +2484,13 @@ def test_populate(self): pm.add_aggressive_dead_code_elimination_pass() pm.add_aa_eval_pass() pm.add_always_inliner_pass() - if llvm_version_major < 15: - pm.add_arg_promotion_pass(42) pm.add_break_critical_edges_pass() pm.add_dead_store_elimination_pass() pm.add_reverse_post_order_function_attrs_pass() - pm.add_aggressive_instruction_combining_pass() + + if llvm_ver < 16: + pm.add_aggressive_instruction_combining_pass() + pm.add_internalize_pass() pm.add_jump_threading_pass(7) pm.add_lcssa_pass() @@ -2502,8 +2501,6 @@ def test_populate(self): pm.add_loop_simplification_pass() pm.add_loop_unroll_pass() pm.add_loop_unroll_and_jam_pass() - if llvm_version_major < 15: - pm.add_loop_unswitch_pass() pm.add_lower_atomic_pass() pm.add_lower_invoke_pass() pm.add_lower_switch_pass() @@ -2511,7 +2508,10 @@ def test_populate(self): pm.add_merge_functions_pass() pm.add_merge_returns_pass() pm.add_partial_inlining_pass() - pm.add_prune_exception_handling_pass() + + if llvm_ver < 16: + pm.add_prune_exception_handling_pass() + pm.add_reassociate_expressions_pass() pm.add_demote_register_to_memory_pass() pm.add_sink_pass() From d75760d228b16b5587a634c2cf335f3a29dc6f3d Mon Sep 17 00:00:00 2001 From: Graham Markall Date: Fri, 5 Jul 2024 14:15:37 +0100 Subject: [PATCH 2/7] Try setting CMake C++ standard to 17 to fix Windows LLVM 16 build --- ffi/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ffi/CMakeLists.txt b/ffi/CMakeLists.txt index 907b1e1ec..c1b7b2315 100755 --- a/ffi/CMakeLists.txt +++ b/ffi/CMakeLists.txt @@ -11,6 +11,8 @@ if(NOT MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -g") endif() +set(CMAKE_CXX_STANDARD 17) + # Work around llvm/llvm-project#83802 - LLVM's Findzstd.cmake uses variables # that require including `GNUInstallDirs`, but it does not include it itself. include(GNUInstallDirs) From f3564f3c27bf8e0c14c0fb404f32b99435463bd5 Mon Sep 17 00:00:00 2001 From: Graham Markall Date: Fri, 5 Jul 2024 14:41:15 +0100 Subject: [PATCH 3/7] Use LLVM 15 on RTD --- .readthedocs.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 43152be07..9b42aaaeb 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -8,6 +8,11 @@ build: os: ubuntu-22.04 tools: python: "3.11" + apt_packages: + - llvm-15 + jobs: + pre_build: + - update-alternatives --install /usr/bin/llvm-config llvm-config /usr/bin/llvm-config-15 200 sphinx: configuration: docs/source/conf.py From 587c885f7701ec40637ebbf42e496d1b5adfc641 Mon Sep 17 00:00:00 2001 From: Graham Markall Date: Fri, 5 Jul 2024 14:48:19 +0100 Subject: [PATCH 4/7] Set LLVM version to 15 in post_system_dependencies on RTD --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 9b42aaaeb..b729cfff7 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -11,7 +11,7 @@ build: apt_packages: - llvm-15 jobs: - pre_build: + post_system_dependencies: - update-alternatives --install /usr/bin/llvm-config llvm-config /usr/bin/llvm-config-15 200 sphinx: From 29fcbb86ed857f42ffec1ed4013f9e4406fc9973 Mon Sep 17 00:00:00 2001 From: Graham Markall Date: Fri, 5 Jul 2024 15:09:10 +0100 Subject: [PATCH 5/7] Try editing build.py on RTD instead --- .readthedocs.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index b729cfff7..9892bd5eb 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -11,8 +11,8 @@ build: apt_packages: - llvm-15 jobs: - post_system_dependencies: - - update-alternatives --install /usr/bin/llvm-config llvm-config /usr/bin/llvm-config-15 200 + post_checkout: + - sed -i "s/'llvm-config'/'llvm-config-15'/g" ffi/build.py sphinx: configuration: docs/source/conf.py From 98452e2a63b286133b3d712c304e485ca289c666 Mon Sep 17 00:00:00 2001 From: Graham Markall Date: Mon, 15 Jul 2024 11:55:15 +0100 Subject: [PATCH 6/7] Remove llvmdev 14 recipe Replace with llvmdev recipe for LLVM 15. --- conda-recipes/llvm14-clear-gotoffsetmap.patch | 31 - .../llvm14-remove-use-of-clonefile.patch | 54 - conda-recipes/llvm14-svml.patch | 2194 ----------------- conda-recipes/llvmdev/bld.bat | 135 +- conda-recipes/llvmdev/build.sh | 172 +- conda-recipes/llvmdev/conda_build_config.yaml | 8 + conda-recipes/llvmdev/meta.yaml | 62 +- ...std-module-for-shared-DLL-on-Windows.patch | 0 .../patches/no-windows-symlinks.patch | 0 conda-recipes/llvmdev_llvm15/bld.bat | 59 - conda-recipes/llvmdev_llvm15/build.sh | 114 - .../llvmdev_llvm15/conda_build_config.yaml | 20 - conda-recipes/llvmdev_llvm15/meta.yaml | 77 - conda-recipes/llvmdev_llvm15/numba-3016.ll | 80 - 14 files changed, 179 insertions(+), 2827 deletions(-) delete mode 100644 conda-recipes/llvm14-clear-gotoffsetmap.patch delete mode 100644 conda-recipes/llvm14-remove-use-of-clonefile.patch delete mode 100644 conda-recipes/llvm14-svml.patch rename conda-recipes/{llvmdev_llvm15 => llvmdev}/patches/0002-CMake-Fix-Findzstd-module-for-shared-DLL-on-Windows.patch (100%) rename conda-recipes/{llvmdev_llvm15 => llvmdev}/patches/no-windows-symlinks.patch (100%) delete mode 100644 conda-recipes/llvmdev_llvm15/bld.bat delete mode 100644 conda-recipes/llvmdev_llvm15/build.sh delete mode 100644 conda-recipes/llvmdev_llvm15/conda_build_config.yaml delete mode 100644 conda-recipes/llvmdev_llvm15/meta.yaml delete mode 100644 conda-recipes/llvmdev_llvm15/numba-3016.ll diff --git a/conda-recipes/llvm14-clear-gotoffsetmap.patch b/conda-recipes/llvm14-clear-gotoffsetmap.patch deleted file mode 100644 index 239f4ab20..000000000 --- a/conda-recipes/llvm14-clear-gotoffsetmap.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 322c79fff224389b4df9f24ac22965867007c2fa Mon Sep 17 00:00:00 2001 -From: Graham Markall -Date: Mon, 13 Mar 2023 21:35:11 +0000 -Subject: [PATCH] RuntimeDyldELF: Clear the GOTOffsetMap when finalizing the - load - -This needs resetting so that stale entries are not left behind when the -GOT section and index are reset. - -See llvm/llvm#61402: RuntimeDyldELF doesn't clear GOTOffsetMap in -finalizeLoad(), leading to invalid GOT relocations on AArch64 - -https://github.com/llvm/llvm-project/issues/61402. ---- - llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/llvm-14.0.6.src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm-14.0.6.src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp -index f92618afdff6..eb3c27a9406a 100644 ---- a/llvm-14.0.6.src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp -+++ b/llvm-14.0.6.src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp -@@ -2345,6 +2345,7 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj, - } - } - -+ GOTOffsetMap.clear(); - GOTSectionID = 0; - CurrentGOTIndex = 0; - --- -2.34.1 - diff --git a/conda-recipes/llvm14-remove-use-of-clonefile.patch b/conda-recipes/llvm14-remove-use-of-clonefile.patch deleted file mode 100644 index 6ef9c9d61..000000000 --- a/conda-recipes/llvm14-remove-use-of-clonefile.patch +++ /dev/null @@ -1,54 +0,0 @@ -diff -ur a/llvm-14.0.6.src/lib/Support/Unix/Path.inc b/llvm-14.0.6.src/lib/Support/Unix/Path.inc ---- a/llvm-14.0.6.src/lib/Support/Unix/Path.inc 2022-03-14 05:44:55.000000000 -0400 -+++ b/llvm-14.0.6.src/lib/Support/Unix/Path.inc 2022-09-19 11:30:59.000000000 -0400 -@@ -1462,6 +1462,7 @@ - std::error_code copy_file(const Twine &From, const Twine &To) { - std::string FromS = From.str(); - std::string ToS = To.str(); -+ /* - #if __has_builtin(__builtin_available) - if (__builtin_available(macos 10.12, *)) { - // Optimistically try to use clonefile() and handle errors, rather than -@@ -1490,6 +1491,7 @@ - // cheaper. - } - #endif -+ */ - if (!copyfile(FromS.c_str(), ToS.c_str(), /*State=*/NULL, COPYFILE_DATA)) - return std::error_code(); - return std::error_code(errno, std::generic_category()); -diff -ur a/llvm-14.0.6.src/unittests/Support/Path.cpp b/llvm-14.0.6.src/unittests/Support/Path.cpp ---- a/llvm-14.0.6.src/unittests/Support/Path.cpp 2022-03-14 05:44:55.000000000 -0400 -+++ b/llvm-14.0.6.src/unittests/Support/Path.cpp 2022-09-19 11:33:07.000000000 -0400 -@@ -2267,15 +2267,15 @@ - - EXPECT_EQ(fs::setPermissions(TempPath, fs::set_uid_on_exe), NoError); - EXPECT_TRUE(CheckPermissions(fs::set_uid_on_exe)); -- -+#if !defined(__APPLE__) - EXPECT_EQ(fs::setPermissions(TempPath, fs::set_gid_on_exe), NoError); - EXPECT_TRUE(CheckPermissions(fs::set_gid_on_exe)); -- -+#endif - // Modern BSDs require root to set the sticky bit on files. - // AIX and Solaris without root will mask off (i.e., lose) the sticky bit - // on files. - #if !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) && \ -- !defined(_AIX) && !(defined(__sun__) && defined(__svr4__)) -+ !defined(_AIX) && !(defined(__sun__) && defined(__svr4__)) && !defined(__APPLE__) - EXPECT_EQ(fs::setPermissions(TempPath, fs::sticky_bit), NoError); - EXPECT_TRUE(CheckPermissions(fs::sticky_bit)); - -@@ -2297,10 +2297,12 @@ - EXPECT_TRUE(CheckPermissions(fs::all_perms)); - #endif // !FreeBSD && !NetBSD && !OpenBSD && !AIX - -+#if !defined(__APPLE__) - EXPECT_EQ(fs::setPermissions(TempPath, fs::all_perms & ~fs::sticky_bit), - NoError); - EXPECT_TRUE(CheckPermissions(fs::all_perms & ~fs::sticky_bit)); - #endif -+#endif - } - - #ifdef _WIN32 diff --git a/conda-recipes/llvm14-svml.patch b/conda-recipes/llvm14-svml.patch deleted file mode 100644 index c753d3f59..000000000 --- a/conda-recipes/llvm14-svml.patch +++ /dev/null @@ -1,2194 +0,0 @@ -From 9de32f5474f1f78990b399214bdbb6c21f8f098e Mon Sep 17 00:00:00 2001 -From: Ivan Butygin -Date: Sun, 24 Jul 2022 20:31:29 +0200 -Subject: [PATCH] Fixes vectorizer and extends SVML support - -Fixes vectorizer and extends SVML support -Patch was updated to fix SVML calling convention issues uncovered by llvm 10. -In previous versions of patch SVML calling convention was selected based on -compilation settings. So if you try to call 256bit vector function from avx512 -code function will be called with avx512 cc which is incorrect. To fix this -SVML cc was separated into 3 different cc for 128, 256 and 512bit vector lengths -which are selected based on actual input vector length. - -Original patch merged several fixes: - -1. https://reviews.llvm.org/D47188 patch fixes the problem with improper calls -to SVML library as it has non-standard calling conventions. So accordingly it -has SVML calling conventions definitions and code to set CC to the vectorized -calls. As SVML provides several implementations for the math functions we also -took into consideration fast attribute and select more fast implementation in -such case. This work is based on original Matt Masten's work. -Author: Denis Nagorny - -2. https://reviews.llvm.org/D53035 patch implements support to legalize SVML -calls by breaking down the illegal vector call instruction into multiple legal -vector call instructions during code generation. Currently the vectorizer does -not check legality of the generated SVML (or any VECLIB) call instructions, and -this can lead to potential problems even during vector type legalization. This -patch addresses this issue by adding a legality check during code generation and -replaces the illegal SVML call with corresponding legalized instructions. -(RFC: http://lists.llvm.org/pipermail/llvm-dev/2018-June/124357.html) -Author: Karthik Senthil - -diff --git a/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h b/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h -index 17d1e3f770c14..110ff08189867 100644 ---- a/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h -+++ b/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h -@@ -39,6 +39,12 @@ struct VecDesc { - NotLibFunc - }; - -+enum SVMLAccuracy { -+ SVML_DEFAULT, -+ SVML_HA, -+ SVML_EP -+}; -+ - /// Implementation of the target library information. - /// - /// This class constructs tables that hold the target library information and -@@ -157,7 +163,7 @@ class TargetLibraryInfoImpl { - /// Return true if the function F has a vector equivalent with vectorization - /// factor VF. - bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const { -- return !getVectorizedFunction(F, VF).empty(); -+ return !getVectorizedFunction(F, VF, false).empty(); - } - - /// Return true if the function F has a vector equivalent with any -@@ -166,7 +172,10 @@ class TargetLibraryInfoImpl { - - /// Return the name of the equivalent of F, vectorized with factor VF. If no - /// such mapping exists, return the empty string. -- StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const; -+ std::string getVectorizedFunction(StringRef F, const ElementCount &VF, bool IsFast) const; -+ -+ Optional getVectorizedFunctionCallingConv( -+ StringRef F, const FunctionType &FTy, const DataLayout &DL) const; - - /// Set to true iff i32 parameters to library functions should have signext - /// or zeroext attributes if they correspond to C-level int or unsigned int, -@@ -326,8 +335,13 @@ class TargetLibraryInfo { - bool isFunctionVectorizable(StringRef F) const { - return Impl->isFunctionVectorizable(F); - } -- StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const { -- return Impl->getVectorizedFunction(F, VF); -+ std::string getVectorizedFunction(StringRef F, const ElementCount &VF, bool IsFast) const { -+ return Impl->getVectorizedFunction(F, VF, IsFast); -+ } -+ -+ Optional getVectorizedFunctionCallingConv( -+ StringRef F, const FunctionType &FTy, const DataLayout &DL) const { -+ return Impl->getVectorizedFunctionCallingConv(F, FTy, DL); - } - - /// Tests if the function is both available and a candidate for optimized code -diff --git a/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h b/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h -index 78ebb35e0ea4d..3ffb57db8b18b 100644 ---- a/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h -+++ b/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h -@@ -133,6 +133,9 @@ enum Kind { - kw_fastcc, - kw_coldcc, - kw_intel_ocl_bicc, -+ kw_intel_svmlcc128, -+ kw_intel_svmlcc256, -+ kw_intel_svmlcc512, - kw_cfguard_checkcc, - kw_x86_stdcallcc, - kw_x86_fastcallcc, -diff --git a/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt b/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt -index 0498fc269b634..23bb3de41bc1a 100644 ---- a/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt -+++ b/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt -@@ -20,3 +20,7 @@ tablegen(LLVM IntrinsicsX86.h -gen-intrinsic-enums -intrinsic-prefix=x86) - tablegen(LLVM IntrinsicsXCore.h -gen-intrinsic-enums -intrinsic-prefix=xcore) - tablegen(LLVM IntrinsicsVE.h -gen-intrinsic-enums -intrinsic-prefix=ve) - add_public_tablegen_target(intrinsics_gen) -+ -+set(LLVM_TARGET_DEFINITIONS SVML.td) -+tablegen(LLVM SVML.inc -gen-svml) -+add_public_tablegen_target(svml_gen) -diff --git a/llvm-14.0.6.src/include/llvm/IR/CallingConv.h b/llvm-14.0.6.src/include/llvm/IR/CallingConv.h -index fd28542465225..096eea1a8e19b 100644 ---- a/llvm-14.0.6.src/include/llvm/IR/CallingConv.h -+++ b/llvm-14.0.6.src/include/llvm/IR/CallingConv.h -@@ -252,6 +252,11 @@ namespace CallingConv { - /// M68k_INTR - Calling convention used for M68k interrupt routines. - M68k_INTR = 101, - -+ /// Intel_SVML - Calling conventions for Intel Short Math Vector Library -+ Intel_SVML128 = 102, -+ Intel_SVML256 = 103, -+ Intel_SVML512 = 104, -+ - /// The highest possible calling convention ID. Must be some 2^k - 1. - MaxID = 1023 - }; -diff --git a/llvm-14.0.6.src/include/llvm/IR/SVML.td b/llvm-14.0.6.src/include/llvm/IR/SVML.td -new file mode 100644 -index 0000000000000..5af710404c9d9 ---- /dev/null -+++ b/llvm-14.0.6.src/include/llvm/IR/SVML.td -@@ -0,0 +1,62 @@ -+//===-- Intel_SVML.td - Defines SVML call variants ---------*- tablegen -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// This file is used by TableGen to define the different typs of SVML function -+// variants used with -fveclib=SVML. -+// -+//===----------------------------------------------------------------------===// -+ -+class SvmlVariant; -+ -+def sin : SvmlVariant; -+def cos : SvmlVariant; -+def pow : SvmlVariant; -+def exp : SvmlVariant; -+def log : SvmlVariant; -+def acos : SvmlVariant; -+def acosh : SvmlVariant; -+def asin : SvmlVariant; -+def asinh : SvmlVariant; -+def atan2 : SvmlVariant; -+def atan : SvmlVariant; -+def atanh : SvmlVariant; -+def cbrt : SvmlVariant; -+def cdfnorm : SvmlVariant; -+def cdfnorminv : SvmlVariant; -+def cosd : SvmlVariant; -+def cosh : SvmlVariant; -+def erf : SvmlVariant; -+def erfc : SvmlVariant; -+def erfcinv : SvmlVariant; -+def erfinv : SvmlVariant; -+def exp10 : SvmlVariant; -+def exp2 : SvmlVariant; -+def expm1 : SvmlVariant; -+def hypot : SvmlVariant; -+def invsqrt : SvmlVariant; -+def log10 : SvmlVariant; -+def log1p : SvmlVariant; -+def log2 : SvmlVariant; -+def sind : SvmlVariant; -+def sinh : SvmlVariant; -+def sqrt : SvmlVariant; -+def tan : SvmlVariant; -+def tanh : SvmlVariant; -+ -+// TODO: SVML does not currently provide _ha and _ep variants of these fucnctions. -+// We should call the default variant of these functions in all cases instead. -+ -+// def nearbyint : SvmlVariant; -+// def logb : SvmlVariant; -+// def floor : SvmlVariant; -+// def fmod : SvmlVariant; -+// def ceil : SvmlVariant; -+// def trunc : SvmlVariant; -+// def rint : SvmlVariant; -+// def round : SvmlVariant; -diff --git a/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt b/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt -index aec84124129f4..98286e166fbe2 100644 ---- a/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt -+++ b/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt -@@ -150,6 +150,7 @@ add_llvm_component_library(LLVMAnalysis - DEPENDS - intrinsics_gen - ${MLDeps} -+ svml_gen - - LINK_LIBS - ${MLLinkDeps} -diff --git a/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp b/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp -index 02923c2c7eb14..83abde28a62a4 100644 ---- a/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp -+++ b/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp -@@ -110,6 +110,11 @@ bool TargetLibraryInfoImpl::isCallingConvCCompatible(Function *F) { - F->getFunctionType()); - } - -+static std::string svmlMangle(StringRef FnName, const bool IsFast) { -+ std::string FullName = FnName.str(); -+ return IsFast ? FullName : FullName + "_ha"; -+} -+ - /// Initialize the set of available library functions based on the specified - /// target triple. This should be carefully written so that a missing target - /// triple gets a sane set of defaults. -@@ -1876,8 +1881,9 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( - } - case SVML: { - const VecDesc VecFuncs[] = { -- #define TLI_DEFINE_SVML_VECFUNCS -- #include "llvm/Analysis/VecFuncs.def" -+ #define GET_SVML_VARIANTS -+ #include "llvm/IR/SVML.inc" -+ #undef GET_SVML_VARIANTS - }; - addVectorizableFunctions(VecFuncs); - break; -@@ -1897,20 +1903,51 @@ bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const { - return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName; - } - --StringRef --TargetLibraryInfoImpl::getVectorizedFunction(StringRef F, -- const ElementCount &VF) const { -+std::string TargetLibraryInfoImpl::getVectorizedFunction(StringRef F, -+ const ElementCount &VF, -+ bool IsFast) const { -+ bool FromSVML = ClVectorLibrary == SVML; - F = sanitizeFunctionName(F); - if (F.empty()) -- return F; -+ return F.str(); - std::vector::const_iterator I = - llvm::lower_bound(VectorDescs, F, compareWithScalarFnName); - while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) { -- if (I->VectorizationFactor == VF) -- return I->VectorFnName; -+ if (I->VectorizationFactor == VF) { -+ if (FromSVML) { -+ return svmlMangle(I->VectorFnName, IsFast); -+ } -+ return I->VectorFnName.str(); -+ } - ++I; - } -- return StringRef(); -+ return std::string(); -+} -+ -+static CallingConv::ID getSVMLCallingConv(const DataLayout &DL, const FunctionType &FType) -+{ -+ assert(isa(FType.getReturnType())); -+ auto *VecCallRetType = cast(FType.getReturnType()); -+ auto TypeBitWidth = DL.getTypeSizeInBits(VecCallRetType); -+ if (TypeBitWidth == 128) { -+ return CallingConv::Intel_SVML128; -+ } else if (TypeBitWidth == 256) { -+ return CallingConv::Intel_SVML256; -+ } else if (TypeBitWidth == 512) { -+ return CallingConv::Intel_SVML512; -+ } else { -+ llvm_unreachable("Invalid vector width"); -+ } -+ return 0; // not reachable -+} -+ -+Optional -+TargetLibraryInfoImpl::getVectorizedFunctionCallingConv( -+ StringRef F, const FunctionType &FTy, const DataLayout &DL) const { -+ if (F.startswith("__svml")) { -+ return getSVMLCallingConv(DL, FTy); -+ } -+ return {}; - } - - TargetLibraryInfo TargetLibraryAnalysis::run(const Function &F, -diff --git a/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp b/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp -index e3bf41c9721b6..4f9dccd4e0724 100644 ---- a/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp -+++ b/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp -@@ -603,6 +603,9 @@ lltok::Kind LLLexer::LexIdentifier() { - KEYWORD(spir_kernel); - KEYWORD(spir_func); - KEYWORD(intel_ocl_bicc); -+ KEYWORD(intel_svmlcc128); -+ KEYWORD(intel_svmlcc256); -+ KEYWORD(intel_svmlcc512); - KEYWORD(x86_64_sysvcc); - KEYWORD(win64cc); - KEYWORD(x86_regcallcc); -diff --git a/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp b/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp -index 432ec151cf8ae..3bd6ee61024b8 100644 ---- a/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp -+++ b/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp -@@ -1781,6 +1781,9 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) { - /// ::= 'ccc' - /// ::= 'fastcc' - /// ::= 'intel_ocl_bicc' -+/// ::= 'intel_svmlcc128' -+/// ::= 'intel_svmlcc256' -+/// ::= 'intel_svmlcc512' - /// ::= 'coldcc' - /// ::= 'cfguard_checkcc' - /// ::= 'x86_stdcallcc' -@@ -1850,6 +1853,9 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) { - case lltok::kw_spir_kernel: CC = CallingConv::SPIR_KERNEL; break; - case lltok::kw_spir_func: CC = CallingConv::SPIR_FUNC; break; - case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break; -+ case lltok::kw_intel_svmlcc128:CC = CallingConv::Intel_SVML128; break; -+ case lltok::kw_intel_svmlcc256:CC = CallingConv::Intel_SVML256; break; -+ case lltok::kw_intel_svmlcc512:CC = CallingConv::Intel_SVML512; break; - case lltok::kw_x86_64_sysvcc: CC = CallingConv::X86_64_SysV; break; - case lltok::kw_win64cc: CC = CallingConv::Win64; break; - case lltok::kw_webkit_jscc: CC = CallingConv::WebKit_JS; break; -diff --git a/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp -index 0ff045fa787e8..175651949ef85 100644 ---- a/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp -+++ b/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp -@@ -157,7 +157,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, - // and the exact vector width of the call operands in the - // TargetLibraryInfo. - const std::string TLIName = -- std::string(TLI.getVectorizedFunction(ScalarName, VF)); -+ std::string(TLI.getVectorizedFunction(ScalarName, VF, CI.getFastMathFlags().isFast())); - - LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `" - << ScalarName << "` and vector width " << VF << ".\n"); -diff --git a/llvm-14.0.6.src/lib/IR/AsmWriter.cpp b/llvm-14.0.6.src/lib/IR/AsmWriter.cpp -index 179754e275b03..c4e95752c97e8 100644 ---- a/llvm-14.0.6.src/lib/IR/AsmWriter.cpp -+++ b/llvm-14.0.6.src/lib/IR/AsmWriter.cpp -@@ -306,6 +306,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { - case CallingConv::X86_RegCall: Out << "x86_regcallcc"; break; - case CallingConv::X86_VectorCall:Out << "x86_vectorcallcc"; break; - case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break; -+ case CallingConv::Intel_SVML128: Out << "intel_svmlcc128"; break; -+ case CallingConv::Intel_SVML256: Out << "intel_svmlcc256"; break; -+ case CallingConv::Intel_SVML512: Out << "intel_svmlcc512"; break; - case CallingConv::ARM_APCS: Out << "arm_apcscc"; break; - case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break; - case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break; -diff --git a/llvm-14.0.6.src/lib/IR/Verifier.cpp b/llvm-14.0.6.src/lib/IR/Verifier.cpp -index 989d01e2e3950..bae7382a36e13 100644 ---- a/llvm-14.0.6.src/lib/IR/Verifier.cpp -+++ b/llvm-14.0.6.src/lib/IR/Verifier.cpp -@@ -2457,6 +2457,9 @@ void Verifier::visitFunction(const Function &F) { - case CallingConv::Fast: - case CallingConv::Cold: - case CallingConv::Intel_OCL_BI: -+ case CallingConv::Intel_SVML128: -+ case CallingConv::Intel_SVML256: -+ case CallingConv::Intel_SVML512: - case CallingConv::PTX_Kernel: - case CallingConv::PTX_Device: - Assert(!F.isVarArg(), "Calling convention does not support varargs or " -diff --git a/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td b/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td -index 4dd8a6cdd8982..12e65521215e4 100644 ---- a/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td -+++ b/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td -@@ -498,6 +498,21 @@ def RetCC_X86_64 : CallingConv<[ - CCDelegateTo - ]>; - -+// Intel_SVML return-value convention. -+def RetCC_Intel_SVML : CallingConv<[ -+ // Vector types are returned in XMM0,XMM1 -+ CCIfType<[v4f32, v2f64], -+ CCAssignToReg<[XMM0,XMM1]>>, -+ -+ // 256-bit FP vectors -+ CCIfType<[v8f32, v4f64], -+ CCAssignToReg<[YMM0,YMM1]>>, -+ -+ // 512-bit FP vectors -+ CCIfType<[v16f32, v8f64], -+ CCAssignToReg<[ZMM0,ZMM1]>> -+]>; -+ - // This is the return-value convention used for the entire X86 backend. - let Entry = 1 in - def RetCC_X86 : CallingConv<[ -@@ -505,6 +520,10 @@ def RetCC_X86 : CallingConv<[ - // Check if this is the Intel OpenCL built-ins calling convention - CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo>, - -+ CCIfCC<"CallingConv::Intel_SVML128", CCDelegateTo>, -+ CCIfCC<"CallingConv::Intel_SVML256", CCDelegateTo>, -+ CCIfCC<"CallingConv::Intel_SVML512", CCDelegateTo>, -+ - CCIfSubtarget<"is64Bit()", CCDelegateTo>, - CCDelegateTo - ]>; -@@ -1064,6 +1083,30 @@ def CC_Intel_OCL_BI : CallingConv<[ - CCDelegateTo - ]>; - -+// X86-64 Intel Short Vector Math Library calling convention. -+def CC_Intel_SVML : CallingConv<[ -+ -+ // The SSE vector arguments are passed in XMM registers. -+ CCIfType<[v4f32, v2f64], -+ CCAssignToReg<[XMM0, XMM1, XMM2]>>, -+ -+ // The 256-bit vector arguments are passed in YMM registers. -+ CCIfType<[v8f32, v4f64], -+ CCAssignToReg<[YMM0, YMM1, YMM2]>>, -+ -+ // The 512-bit vector arguments are passed in ZMM registers. -+ CCIfType<[v16f32, v8f64], -+ CCAssignToReg<[ZMM0, ZMM1, ZMM2]>> -+]>; -+ -+def CC_X86_32_Intr : CallingConv<[ -+ CCAssignToStack<4, 4> -+]>; -+ -+def CC_X86_64_Intr : CallingConv<[ -+ CCAssignToStack<8, 8> -+]>; -+ - //===----------------------------------------------------------------------===// - // X86 Root Argument Calling Conventions - //===----------------------------------------------------------------------===// -@@ -1115,6 +1158,9 @@ def CC_X86_64 : CallingConv<[ - let Entry = 1 in - def CC_X86 : CallingConv<[ - CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo>, -+ CCIfCC<"CallingConv::Intel_SVML128", CCDelegateTo>, -+ CCIfCC<"CallingConv::Intel_SVML256", CCDelegateTo>, -+ CCIfCC<"CallingConv::Intel_SVML512", CCDelegateTo>, - CCIfSubtarget<"is64Bit()", CCDelegateTo>, - CCDelegateTo - ]>; -@@ -1227,3 +1273,27 @@ def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, - (sequence "R%u", 12, 15))>; - def CSR_SysV64_RegCall : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE, - (sequence "XMM%u", 8, 15))>; -+ -+// SVML calling convention -+def CSR_32_Intel_SVML : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE)>; -+def CSR_32_Intel_SVML_AVX512 : CalleeSavedRegs<(add CSR_32_Intel_SVML, -+ K4, K5, K6, K7)>; -+ -+def CSR_64_Intel_SVML_NoSSE : CalleeSavedRegs<(add RBX, RSI, RDI, RBP, RSP, R12, R13, R14, R15)>; -+ -+def CSR_64_Intel_SVML : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE, -+ (sequence "XMM%u", 8, 15))>; -+def CSR_Win64_Intel_SVML : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE, -+ (sequence "XMM%u", 6, 15))>; -+ -+def CSR_64_Intel_SVML_AVX : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE, -+ (sequence "YMM%u", 8, 15))>; -+def CSR_Win64_Intel_SVML_AVX : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE, -+ (sequence "YMM%u", 6, 15))>; -+ -+def CSR_64_Intel_SVML_AVX512 : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE, -+ (sequence "ZMM%u", 16, 31), -+ K4, K5, K6, K7)>; -+def CSR_Win64_Intel_SVML_AVX512 : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE, -+ (sequence "ZMM%u", 6, 21), -+ K4, K5, K6, K7)>; -diff --git a/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp b/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp -index 8bb7e81e19bbd..1780ce3fc6467 100644 ---- a/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp -+++ b/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp -@@ -3788,7 +3788,8 @@ void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) { - // FIXME: Only some x86_32 calling conventions support AVX512. - if (Subtarget.useAVX512Regs() && - (is64Bit() || (CallConv == CallingConv::X86_VectorCall || -- CallConv == CallingConv::Intel_OCL_BI))) -+ CallConv == CallingConv::Intel_OCL_BI || -+ CallConv == CallingConv::Intel_SVML512))) - VecVT = MVT::v16f32; - else if (Subtarget.hasAVX()) - VecVT = MVT::v8f32; -diff --git a/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp b/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp -index 130cb61cdde24..9eec3b25ca9f2 100644 ---- a/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp -+++ b/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp -@@ -272,6 +272,42 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, - } - } - -+namespace { -+std::pair getSVMLRegMaskAndSaveList( -+ bool Is64Bit, bool IsWin64, CallingConv::ID CC) { -+ assert(CC >= CallingConv::Intel_SVML128 && CC <= CallingConv::Intel_SVML512); -+ unsigned Abi = CC - CallingConv::Intel_SVML128 ; // 0 - 128, 1 - 256, 2 - 512 -+ -+ const std::pair Abi64[] = { -+ std::make_pair(CSR_64_Intel_SVML_RegMask, CSR_64_Intel_SVML_SaveList), -+ std::make_pair(CSR_64_Intel_SVML_AVX_RegMask, CSR_64_Intel_SVML_AVX_SaveList), -+ std::make_pair(CSR_64_Intel_SVML_AVX512_RegMask, CSR_64_Intel_SVML_AVX512_SaveList), -+ }; -+ -+ const std::pair AbiWin64[] = { -+ std::make_pair(CSR_Win64_Intel_SVML_RegMask, CSR_Win64_Intel_SVML_SaveList), -+ std::make_pair(CSR_Win64_Intel_SVML_AVX_RegMask, CSR_Win64_Intel_SVML_AVX_SaveList), -+ std::make_pair(CSR_Win64_Intel_SVML_AVX512_RegMask, CSR_Win64_Intel_SVML_AVX512_SaveList), -+ }; -+ -+ const std::pair Abi32[] = { -+ std::make_pair(CSR_32_Intel_SVML_RegMask, CSR_32_Intel_SVML_SaveList), -+ std::make_pair(CSR_32_Intel_SVML_RegMask, CSR_32_Intel_SVML_SaveList), -+ std::make_pair(CSR_32_Intel_SVML_AVX512_RegMask, CSR_32_Intel_SVML_AVX512_SaveList), -+ }; -+ -+ if (Is64Bit) { -+ if (IsWin64) { -+ return AbiWin64[Abi]; -+ } else { -+ return Abi64[Abi]; -+ } -+ } else { -+ return Abi32[Abi]; -+ } -+} -+} -+ - const MCPhysReg * - X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - assert(MF && "MachineFunction required"); -@@ -327,6 +363,11 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - return CSR_64_Intel_OCL_BI_SaveList; - break; - } -+ case CallingConv::Intel_SVML128: -+ case CallingConv::Intel_SVML256: -+ case CallingConv::Intel_SVML512: { -+ return getSVMLRegMaskAndSaveList(Is64Bit, IsWin64, CC).second; -+ } - case CallingConv::HHVM: - return CSR_64_HHVM_SaveList; - case CallingConv::X86_RegCall: -@@ -449,6 +490,11 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, - return CSR_64_Intel_OCL_BI_RegMask; - break; - } -+ case CallingConv::Intel_SVML128: -+ case CallingConv::Intel_SVML256: -+ case CallingConv::Intel_SVML512: { -+ return getSVMLRegMaskAndSaveList(Is64Bit, IsWin64, CC).first; -+ } - case CallingConv::HHVM: - return CSR_64_HHVM_RegMask; - case CallingConv::X86_RegCall: -diff --git a/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h b/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h -index 5d773f0c57dfb..6bdf5bc6f3fe9 100644 ---- a/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h -+++ b/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h -@@ -916,6 +916,9 @@ class X86Subtarget final : public X86GenSubtargetInfo { - case CallingConv::X86_ThisCall: - case CallingConv::X86_VectorCall: - case CallingConv::Intel_OCL_BI: -+ case CallingConv::Intel_SVML128: -+ case CallingConv::Intel_SVML256: -+ case CallingConv::Intel_SVML512: - return isTargetWin64(); - // This convention allows using the Win64 convention on other targets. - case CallingConv::Win64: -diff --git a/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp -index 047bf5569ded3..59897785f156c 100644 ---- a/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp -+++ b/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp -@@ -92,7 +92,7 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) { - - auto AddVariantDecl = [&](const ElementCount &VF) { - const std::string TLIName = -- std::string(TLI.getVectorizedFunction(ScalarName, VF)); -+ std::string(TLI.getVectorizedFunction(ScalarName, VF, CI.getFastMathFlags().isFast())); - if (!TLIName.empty()) { - std::string MangledName = - VFABI::mangleTLIVectorName(TLIName, ScalarName, CI.arg_size(), VF); -diff --git a/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp -index 46ff0994e04e7..f472af5e1a835 100644 ---- a/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp -+++ b/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp -@@ -712,6 +712,27 @@ class InnerLoopVectorizer { - virtual void printDebugTracesAtStart(){}; - virtual void printDebugTracesAtEnd(){}; - -+ /// Check legality of given SVML call instruction \p VecCall generated for -+ /// scalar call \p Call. If illegal then the appropriate legal instruction -+ /// is returned. -+ Value *legalizeSVMLCall(CallInst *VecCall, CallInst *Call); -+ -+ /// Returns the legal VF for a call instruction \p CI using TTI information -+ /// and vector type. -+ ElementCount getLegalVFForCall(CallInst *CI); -+ -+ /// Partially vectorize a given call \p Call by breaking it down into multiple -+ /// calls of \p LegalCall, decided by the variant VF \p LegalVF. -+ Value *partialVectorizeCall(CallInst *Call, CallInst *LegalCall, -+ unsigned LegalVF); -+ -+ /// Generate shufflevector instruction for a vector value \p V based on the -+ /// current \p Part and a smaller VF \p LegalVF. -+ Value *generateShuffleValue(Value *V, unsigned LegalVF, unsigned Part); -+ -+ /// Combine partially vectorized calls stored in \p CallResults. -+ Value *combinePartialVecCalls(SmallVectorImpl &CallResults); -+ - /// The original loop. - Loop *OrigLoop; - -@@ -4596,6 +4617,17 @@ static bool mayDivideByZero(Instruction &I) { - return !CInt || CInt->isZero(); - } - -+static void setVectorFunctionCallingConv(CallInst &CI, const DataLayout &DL, -+ const TargetLibraryInfo &TLI) { -+ Function *VectorF = CI.getCalledFunction(); -+ FunctionType *FTy = VectorF->getFunctionType(); -+ StringRef VFName = VectorF->getName(); -+ auto CC = TLI.getVectorizedFunctionCallingConv(VFName, *FTy, DL); -+ if (CC) { -+ CI.setCallingConv(*CC); -+ } -+} -+ - void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def, - VPUser &ArgOperands, - VPTransformState &State) { -@@ -4664,9 +4696,246 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def, - if (isa(V)) - V->copyFastMathFlags(CI); - -+ const DataLayout &DL = V->getModule()->getDataLayout(); -+ setVectorFunctionCallingConv(*V, DL, *TLI); -+ -+ // Perform legalization of SVML call instruction only if original call -+ // was not Intrinsic -+ if (!UseVectorIntrinsic && -+ (V->getCalledFunction()->getName()).startswith("__svml")) { -+ // assert((V->getCalledFunction()->getName()).startswith("__svml")); -+ LLVM_DEBUG(dbgs() << "LV(SVML): Vector call inst:"; V->dump()); -+ auto *LegalV = cast(legalizeSVMLCall(V, CI)); -+ LLVM_DEBUG(dbgs() << "LV: Completed SVML legalization.\n LegalV: "; -+ LegalV->dump()); -+ State.set(Def, LegalV, Part); -+ addMetadata(LegalV, &I); -+ } else { - State.set(Def, V, Part); - addMetadata(V, &I); -+ } -+ } -+} -+ -+//===----------------------------------------------------------------------===// -+// Implementation of functions for SVML vector call legalization. -+//===----------------------------------------------------------------------===// -+// -+// Unlike other VECLIBs, SVML needs to be used with target-legal -+// vector types. Otherwise, link failures and/or runtime failures -+// will occur. A motivating example could be - -+// -+// double *a; -+// float *b; -+// #pragma clang loop vectorize_width(8) -+// for(i = 0; i < N; ++i) { -+// a[i] = sin(i); // Legal SVML VF must be 4 or below on AVX -+// b[i] = cosf(i); // VF can be 8 on AVX since 8 floats can fit in YMM -+// } -+// -+// Current implementation of vector code generation in LV is -+// driven based on a single VF (in InnerLoopVectorizer::VF). This -+// inhibits the flexibility of adjusting/choosing different VF -+// for different instructions. -+// -+// Due to this limitation it is much more straightforward to -+// first generate the illegal sin8 (svml_sin8 for SVML vector -+// library) call and then legalize it than trying to avoid -+// generating illegal code from the beginning. -+// -+// A solution for this problem is to check legality of the -+// call instruction right after generating it in vectorizer and -+// if it is illegal we split the call arguments and issue multiple -+// calls to match the legal VF. This is demonstrated currently for -+// the SVML vector library calls (non-intrinsic version only). -+// -+// Future directions and extensions: -+// 1) This legalization example shows us that a good direction -+// for the VPlan framework would be to model the vector call -+// instructions in a way that legal VF for each call is chosen -+// correctly within vectorizer and illegal code generation is -+// avoided. -+// 2) This logic can also be extended to general vector functions -+// i.e. legalization OpenMP decalre simd functions. The -+// requirements needed for this will be documented soon. -+ -+Value *InnerLoopVectorizer::legalizeSVMLCall(CallInst *VecCall, -+ CallInst *Call) { -+ ElementCount LegalVF = getLegalVFForCall(VecCall); -+ -+ assert(LegalVF.getKnownMinValue() > 1 && -+ "Legal VF for SVML call must be greater than 1 to vectorize"); -+ -+ if (LegalVF == VF) -+ return VecCall; -+ else if (LegalVF.getKnownMinValue() > VF.getKnownMinValue()) -+ // TODO: handle case when we are underfilling vectors -+ return VecCall; -+ -+ // Legal VF for this SVML call is smaller than chosen VF, break it down into -+ // smaller call instructions -+ -+ // Convert args, types and return type to match legal VF -+ SmallVector NewTys; -+ SmallVector NewArgs; -+ -+ for (Value *ArgOperand : Call->args()) { -+ Type *Ty = ToVectorTy(ArgOperand->getType(), LegalVF); -+ NewTys.push_back(Ty); -+ NewArgs.push_back(UndefValue::get(Ty)); - } -+ -+ // Construct legal vector function -+ const VFShape Shape = -+ VFShape::get(*Call, LegalVF /*EC*/, false /*HasGlobalPred*/); -+ Function *LegalVectorF = VFDatabase(*Call).getVectorizedFunction(Shape); -+ assert(LegalVectorF != nullptr && "Can't create legal vector function."); -+ -+ LLVM_DEBUG(dbgs() << "LV(SVML): LegalVectorF: "; LegalVectorF->dump()); -+ -+ SmallVector OpBundles; -+ Call->getOperandBundlesAsDefs(OpBundles); -+ auto LegalV = std::unique_ptr(CallInst::Create(LegalVectorF, NewArgs, OpBundles)); -+ -+ if (isa(LegalV)) -+ LegalV->copyFastMathFlags(Call); -+ -+ const DataLayout &DL = VecCall->getModule()->getDataLayout(); -+ // Set SVML calling conventions -+ setVectorFunctionCallingConv(*LegalV, DL, *TLI); -+ -+ LLVM_DEBUG(dbgs() << "LV(SVML): LegalV: "; LegalV->dump()); -+ -+ Value *LegalizedCall = partialVectorizeCall(VecCall, LegalV.get(), LegalVF.getKnownMinValue()); -+ -+ LLVM_DEBUG(dbgs() << "LV(SVML): LegalizedCall: "; LegalizedCall->dump()); -+ -+ // Remove the illegal call from Builder -+ VecCall->eraseFromParent(); -+ -+ return LegalizedCall; -+} -+ -+ElementCount InnerLoopVectorizer::getLegalVFForCall(CallInst *CI) { -+ const DataLayout DL = CI->getModule()->getDataLayout(); -+ FunctionType *CallFT = CI->getFunctionType(); -+ // All functions that need legalization should have a vector return type. -+ // This is true for all SVML functions that are currently supported. -+ assert(isa(CallFT->getReturnType()) && -+ "Return type of call that needs legalization is not a vector."); -+ auto *VecCallRetType = cast(CallFT->getReturnType()); -+ Type *ElemType = VecCallRetType->getElementType(); -+ -+ unsigned TypeBitWidth = DL.getTypeSizeInBits(ElemType); -+ unsigned VectorBitWidth = TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector); -+ unsigned LegalVF = VectorBitWidth / TypeBitWidth; -+ -+ LLVM_DEBUG(dbgs() << "LV(SVML): Type Bit Width: " << TypeBitWidth << "\n"); -+ LLVM_DEBUG(dbgs() << "LV(SVML): Current VL: " << VF << "\n"); -+ LLVM_DEBUG(dbgs() << "LV(SVML): Vector Bit Width: " << VectorBitWidth -+ << "\n"); -+ LLVM_DEBUG(dbgs() << "LV(SVML): Legal Target VL: " << LegalVF << "\n"); -+ -+ return ElementCount::getFixed(LegalVF); -+} -+ -+// Partial vectorization of a call instruction is achieved by making clones of -+// \p LegalCall and overwriting its argument operands with shufflevector -+// equivalent decided based on \p LegalVF and current Part being filled. -+Value *InnerLoopVectorizer::partialVectorizeCall(CallInst *Call, -+ CallInst *LegalCall, -+ unsigned LegalVF) { -+ unsigned NumParts = VF.getKnownMinValue() / LegalVF; -+ LLVM_DEBUG(dbgs() << "LV(SVML): NumParts: " << NumParts << "\n"); -+ SmallVector CallResults; -+ -+ for (unsigned Part = 0; Part < NumParts; ++Part) { -+ auto *ClonedCall = cast(LegalCall->clone()); -+ -+ // Update the arg operand of cloned call to shufflevector -+ for (unsigned i = 0, ie = Call->arg_size(); i != ie; ++i) { -+ auto *NewOp = generateShuffleValue(Call->getArgOperand(i), LegalVF, Part); -+ ClonedCall->setArgOperand(i, NewOp); -+ } -+ -+ LLVM_DEBUG(dbgs() << "LV(SVML): ClonedCall: "; ClonedCall->dump()); -+ -+ auto *PartialVecCall = Builder.Insert(ClonedCall); -+ CallResults.push_back(PartialVecCall); -+ } -+ -+ return combinePartialVecCalls(CallResults); -+} -+ -+Value *InnerLoopVectorizer::generateShuffleValue(Value *V, unsigned LegalVF, -+ unsigned Part) { -+ // Example: -+ // Consider the following vector code - -+ // %1 = sitofp <4 x i32> %0 to <4 x double> -+ // %2 = call <4 x double> @__svml_sin4(<4 x double> %1) -+ // -+ // If the LegalVF is 2, we partially vectorize the sin4 call by invoking -+ // generateShuffleValue on the operand %1 -+ // If Part = 1, output value is - -+ // %shuffle = shufflevector <4 x double> %1, <4 x double> undef, <2 x i32> -+ // and if Part = 2, output is - -+ // %shuffle7 =shufflevector <4 x double> %1, <4 x double> undef, <2 x i32> -+ -+ assert(isa(V->getType()) && -+ "Cannot generate shuffles for non-vector values."); -+ SmallVector ShuffleMask; -+ Value *Undef = UndefValue::get(V->getType()); -+ -+ unsigned ElemIdx = Part * LegalVF; -+ -+ for (unsigned K = 0; K < LegalVF; K++) -+ ShuffleMask.push_back(static_cast(ElemIdx + K)); -+ -+ auto *ShuffleInst = -+ Builder.CreateShuffleVector(V, Undef, ShuffleMask, "shuffle"); -+ -+ return ShuffleInst; -+} -+ -+// Results of the calls executed by smaller legal call instructions must be -+// combined to match the original VF for later use. This is done by constructing -+// shufflevector instructions in a cumulative fashion. -+Value *InnerLoopVectorizer::combinePartialVecCalls( -+ SmallVectorImpl &CallResults) { -+ assert(isa(CallResults[0]->getType()) && -+ "Cannot combine calls with non-vector results."); -+ auto *CallType = cast(CallResults[0]->getType()); -+ -+ Value *CombinedShuffle; -+ unsigned NumElems = CallType->getElementCount().getKnownMinValue() * 2; -+ unsigned NumRegs = CallResults.size(); -+ -+ assert(NumRegs >= 2 && isPowerOf2_32(NumRegs) && -+ "Number of partial vector calls to combine must be a power of 2 " -+ "(atleast 2^1)"); -+ -+ while (NumRegs > 1) { -+ for (unsigned I = 0; I < NumRegs; I += 2) { -+ SmallVector ShuffleMask; -+ for (unsigned J = 0; J < NumElems; J++) -+ ShuffleMask.push_back(static_cast(J)); -+ -+ CombinedShuffle = Builder.CreateShuffleVector( -+ CallResults[I], CallResults[I + 1], ShuffleMask, "combined"); -+ LLVM_DEBUG(dbgs() << "LV(SVML): CombinedShuffle:"; -+ CombinedShuffle->dump()); -+ CallResults.push_back(CombinedShuffle); -+ } -+ -+ SmallVector::iterator Start = CallResults.begin(); -+ SmallVector::iterator End = Start + NumRegs; -+ CallResults.erase(Start, End); -+ -+ NumElems *= 2; -+ NumRegs /= 2; -+ } -+ -+ return CombinedShuffle; - } - - void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) { -diff --git a/llvm-14.0.6.src/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm-14.0.6.src/lib/Transforms/Vectorize/SLPVectorizer.cpp -index 644372483edde..342f018b92184 100644 ---- a/llvm-14.0.6.src/lib/Transforms/Vectorize/SLPVectorizer.cpp -+++ b/llvm-14.0.6.src/lib/Transforms/Vectorize/SLPVectorizer.cpp -@@ -6322,6 +6322,17 @@ Value *BoUpSLP::vectorizeTree(ArrayRef VL) { - return Vec; - } - -+static void setVectorFunctionCallingConv(CallInst &CI, const DataLayout &DL, -+ const TargetLibraryInfo &TLI) { -+ Function *VectorF = CI.getCalledFunction(); -+ FunctionType *FTy = VectorF->getFunctionType(); -+ StringRef VFName = VectorF->getName(); -+ auto CC = TLI.getVectorizedFunctionCallingConv(VFName, *FTy, DL); -+ if (CC) { -+ CI.setCallingConv(*CC); -+ } -+} -+ - Value *BoUpSLP::vectorizeTree(TreeEntry *E) { - IRBuilder<>::InsertPointGuard Guard(Builder); - -@@ -6794,7 +6805,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { - - SmallVector OpBundles; - CI->getOperandBundlesAsDefs(OpBundles); -- Value *V = Builder.CreateCall(CF, OpVecs, OpBundles); -+ -+ CallInst *NewCall = Builder.CreateCall(CF, OpVecs, OpBundles); -+ const DataLayout &DL = NewCall->getModule()->getDataLayout(); -+ setVectorFunctionCallingConv(*NewCall, DL, *TLI); -+ -+ Value *V = NewCall; - - // The scalar argument uses an in-tree scalar so we add the new vectorized - // call to ExternalUses list to make sure that an extract will be -diff --git a/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll b/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll -index df8b7c498bd00..63a36549f18fd 100644 ---- a/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll -+++ b/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll -@@ -10,7 +10,7 @@ target triple = "x86_64-unknown-linux-gnu" - define <4 x double> @exp_v4(<4 x double> %in) { - ; SVML-LABEL: define {{[^@]+}}@exp_v4 - ; SVML-SAME: (<4 x double> [[IN:%.*]]) { --; SVML-NEXT: [[TMP1:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[IN]]) -+; SVML-NEXT: [[TMP1:%.*]] = call <4 x double> @__svml_exp4_ha(<4 x double> [[IN]]) - ; SVML-NEXT: ret <4 x double> [[TMP1]] - ; - ; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_v4 -@@ -37,7 +37,7 @@ declare <4 x double> @llvm.exp.v4f64(<4 x double>) #0 - define <4 x float> @exp_f32(<4 x float> %in) { - ; SVML-LABEL: define {{[^@]+}}@exp_f32 - ; SVML-SAME: (<4 x float> [[IN:%.*]]) { --; SVML-NEXT: [[TMP1:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[IN]]) -+; SVML-NEXT: [[TMP1:%.*]] = call <4 x float> @__svml_expf4_ha(<4 x float> [[IN]]) - ; SVML-NEXT: ret <4 x float> [[TMP1]] - ; - ; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_f32 -diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll -index a6e191c3d6923..d6e2e11106949 100644 ---- a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll -+++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll -@@ -39,7 +39,8 @@ for.end: ; preds = %for.body - declare double @__exp_finite(double) #0 - - ; CHECK-LABEL: @exp_f64 --; CHECK: <4 x double> @__svml_exp4 -+; CHECK: <2 x double> @__svml_exp2 -+; CHECK: <2 x double> @__svml_exp2 - ; CHECK: ret - define void @exp_f64(double* nocapture %varray) { - entry: -@@ -99,7 +100,8 @@ for.end: ; preds = %for.body - declare double @__log_finite(double) #0 - - ; CHECK-LABEL: @log_f64 --; CHECK: <4 x double> @__svml_log4 -+; CHECK: <2 x double> @__svml_log2 -+; CHECK: <2 x double> @__svml_log2 - ; CHECK: ret - define void @log_f64(double* nocapture %varray) { - entry: -@@ -159,7 +161,8 @@ for.end: ; preds = %for.body - declare double @__pow_finite(double, double) #0 - - ; CHECK-LABEL: @pow_f64 --; CHECK: <4 x double> @__svml_pow4 -+; CHECK: <2 x double> @__svml_pow2 -+; CHECK: <2 x double> @__svml_pow2 - ; CHECK: ret - define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) { - entry: -@@ -190,7 +193,8 @@ declare float @__exp2f_finite(float) #0 - - define void @exp2f_finite(float* nocapture %varray) { - ; CHECK-LABEL: @exp2f_finite( --; CHECK: call <4 x float> @__svml_exp2f4(<4 x float> %{{.*}}) -+; CHECK: call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> %{{.*}}) -+; CHECK: call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> %{{.*}}) - ; CHECK: ret void - ; - entry: -@@ -219,7 +223,8 @@ declare double @__exp2_finite(double) #0 - - define void @exp2_finite(double* nocapture %varray) { - ; CHECK-LABEL: @exp2_finite( --; CHECK: call <4 x double> @__svml_exp24(<4 x double> {{.*}}) -+; CHECK: call intel_svmlcc128 <2 x double> @__svml_exp22_ha(<2 x double> {{.*}}) -+; CHECK: call intel_svmlcc128 <2 x double> @__svml_exp22_ha(<2 x double> {{.*}}) - ; CHECK: ret void - ; - entry: -@@ -276,7 +281,8 @@ for.end: ; preds = %for.body - declare double @__log2_finite(double) #0 - - ; CHECK-LABEL: @log2_f64 --; CHECK: <4 x double> @__svml_log24 -+; CHECK: <2 x double> @__svml_log22 -+; CHECK: <2 x double> @__svml_log22 - ; CHECK: ret - define void @log2_f64(double* nocapture %varray) { - entry: -@@ -333,7 +339,8 @@ for.end: ; preds = %for.body - declare double @__log10_finite(double) #0 - - ; CHECK-LABEL: @log10_f64 --; CHECK: <4 x double> @__svml_log104 -+; CHECK: <2 x double> @__svml_log102 -+; CHECK: <2 x double> @__svml_log102 - ; CHECK: ret - define void @log10_f64(double* nocapture %varray) { - entry: -@@ -390,7 +397,8 @@ for.end: ; preds = %for.body - declare double @__sqrt_finite(double) #0 - - ; CHECK-LABEL: @sqrt_f64 --; CHECK: <4 x double> @__svml_sqrt4 -+; CHECK: <2 x double> @__svml_sqrt2 -+; CHECK: <2 x double> @__svml_sqrt2 - ; CHECK: ret - define void @sqrt_f64(double* nocapture %varray) { - entry: -diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll -index 42c280df6ad02..088bbdcf1aa4a 100644 ---- a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll -+++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll -@@ -48,7 +48,7 @@ declare float @llvm.exp2.f32(float) #0 - - define void @sin_f64(double* nocapture %varray) { - ; CHECK-LABEL: @sin_f64( --; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -71,7 +71,7 @@ for.end: - - define void @sin_f32(float* nocapture %varray) { - ; CHECK-LABEL: @sin_f32( --; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_sinf4_ha(<4 x float> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -94,7 +94,7 @@ for.end: - - define void @sin_f64_intrinsic(double* nocapture %varray) { - ; CHECK-LABEL: @sin_f64_intrinsic( --; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -117,7 +117,7 @@ for.end: - - define void @sin_f32_intrinsic(float* nocapture %varray) { - ; CHECK-LABEL: @sin_f32_intrinsic( --; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_sinf4_ha(<4 x float> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -140,7 +140,7 @@ for.end: - - define void @cos_f64(double* nocapture %varray) { - ; CHECK-LABEL: @cos_f64( --; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -163,7 +163,7 @@ for.end: - - define void @cos_f32(float* nocapture %varray) { - ; CHECK-LABEL: @cos_f32( --; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_cosf4_ha(<4 x float> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -186,7 +186,7 @@ for.end: - - define void @cos_f64_intrinsic(double* nocapture %varray) { - ; CHECK-LABEL: @cos_f64_intrinsic( --; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -209,7 +209,7 @@ for.end: - - define void @cos_f32_intrinsic(float* nocapture %varray) { - ; CHECK-LABEL: @cos_f32_intrinsic( --; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_cosf4_ha(<4 x float> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -232,7 +232,7 @@ for.end: - - define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) { - ; CHECK-LABEL: @pow_f64( --; CHECK: [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]]) -+; CHECK: [[TMP8:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -257,7 +257,7 @@ for.end: - - define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) { - ; CHECK-LABEL: @pow_f64_intrinsic( --; CHECK: [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]]) -+; CHECK: [[TMP8:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -282,7 +282,7 @@ for.end: - - define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) { - ; CHECK-LABEL: @pow_f32( --; CHECK: [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]]) -+; CHECK: [[TMP8:%.*]] = call intel_svmlcc128 <4 x float> @__svml_powf4_ha(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -307,7 +307,7 @@ for.end: - - define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) { - ; CHECK-LABEL: @pow_f32_intrinsic( --; CHECK: [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]]) -+; CHECK: [[TMP8:%.*]] = call intel_svmlcc128 <4 x float> @__svml_powf4_ha(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -332,7 +332,7 @@ for.end: - - define void @exp_f64(double* nocapture %varray) { - ; CHECK-LABEL: @exp_f64( --; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -355,7 +355,7 @@ for.end: - - define void @exp_f32(float* nocapture %varray) { - ; CHECK-LABEL: @exp_f32( --; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_expf4_ha(<4 x float> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -378,7 +378,7 @@ for.end: - - define void @exp_f64_intrinsic(double* nocapture %varray) { - ; CHECK-LABEL: @exp_f64_intrinsic( --; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -401,7 +401,7 @@ for.end: - - define void @exp_f32_intrinsic(float* nocapture %varray) { - ; CHECK-LABEL: @exp_f32_intrinsic( --; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_expf4_ha(<4 x float> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -424,7 +424,7 @@ for.end: - - define void @log_f64(double* nocapture %varray) { - ; CHECK-LABEL: @log_f64( --; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -447,7 +447,7 @@ for.end: - - define void @log_f32(float* nocapture %varray) { - ; CHECK-LABEL: @log_f32( --; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_logf4_ha(<4 x float> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -470,7 +470,7 @@ for.end: - - define void @log_f64_intrinsic(double* nocapture %varray) { - ; CHECK-LABEL: @log_f64_intrinsic( --; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -493,7 +493,7 @@ for.end: - - define void @log_f32_intrinsic(float* nocapture %varray) { - ; CHECK-LABEL: @log_f32_intrinsic( --; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_logf4_ha(<4 x float> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -516,7 +516,7 @@ for.end: - - define void @log2_f64(double* nocapture %varray) { - ; CHECK-LABEL: @log2_f64( --; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log24_ha(<4 x double> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -539,7 +539,7 @@ for.end: - - define void @log2_f32(float* nocapture %varray) { - ; CHECK-LABEL: @log2_f32( --; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log2f4_ha(<4 x float> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -562,7 +562,7 @@ for.end: - - define void @log2_f64_intrinsic(double* nocapture %varray) { - ; CHECK-LABEL: @log2_f64_intrinsic( --; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log24_ha(<4 x double> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -585,7 +585,7 @@ for.end: - - define void @log2_f32_intrinsic(float* nocapture %varray) { - ; CHECK-LABEL: @log2_f32_intrinsic( --; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log2f4_ha(<4 x float> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -608,7 +608,7 @@ for.end: - - define void @log10_f64(double* nocapture %varray) { - ; CHECK-LABEL: @log10_f64( --; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log104_ha(<4 x double> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -631,7 +631,7 @@ for.end: - - define void @log10_f32(float* nocapture %varray) { - ; CHECK-LABEL: @log10_f32( --; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log10f4_ha(<4 x float> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -654,7 +654,7 @@ for.end: - - define void @log10_f64_intrinsic(double* nocapture %varray) { - ; CHECK-LABEL: @log10_f64_intrinsic( --; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log104_ha(<4 x double> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -677,7 +677,7 @@ for.end: - - define void @log10_f32_intrinsic(float* nocapture %varray) { - ; CHECK-LABEL: @log10_f32_intrinsic( --; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log10f4_ha(<4 x float> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -700,7 +700,7 @@ for.end: - - define void @sqrt_f64(double* nocapture %varray) { - ; CHECK-LABEL: @sqrt_f64( --; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_sqrt4(<4 x double> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sqrt4_ha(<4 x double> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -723,7 +723,7 @@ for.end: - - define void @sqrt_f32(float* nocapture %varray) { - ; CHECK-LABEL: @sqrt_f32( --; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_sqrtf4(<4 x float> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_sqrtf4_ha(<4 x float> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -746,7 +746,7 @@ for.end: - - define void @exp2_f64(double* nocapture %varray) { - ; CHECK-LABEL: @exp2_f64( --; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp24_ha(<4 x double> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -769,7 +769,7 @@ for.end: - - define void @exp2_f32(float* nocapture %varray) { - ; CHECK-LABEL: @exp2_f32( --; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -792,7 +792,7 @@ for.end: - - define void @exp2_f64_intrinsic(double* nocapture %varray) { - ; CHECK-LABEL: @exp2_f64_intrinsic( --; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp24_ha(<4 x double> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -815,7 +815,7 @@ for.end: - - define void @exp2_f32_intrinsic(float* nocapture %varray) { - ; CHECK-LABEL: @exp2_f32_intrinsic( --; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]]) -+; CHECK: [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: -@@ -836,4 +836,44 @@ for.end: - ret void - } - -+; CHECK-LABEL: @atan2_finite -+; CHECK: intel_svmlcc256 <4 x double> @__svml_atan24( -+; CHECK: intel_svmlcc256 <4 x double> @__svml_atan24( -+; CHECK: ret -+ -+declare double @__atan2_finite(double, double) local_unnamed_addr #0 -+ -+define void @atan2_finite([100 x double]* nocapture %varray) local_unnamed_addr #0 { -+entry: -+ br label %for.cond1.preheader -+ -+for.cond1.preheader: ; preds = %for.inc7, %entry -+ %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for.inc7 ] -+ %0 = trunc i64 %indvars.iv19 to i32 -+ %conv = sitofp i32 %0 to double -+ br label %for.body3 -+ -+for.body3: ; preds = %for.body3, %for.cond1.preheader -+ %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ] -+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -+ %1 = trunc i64 %indvars.iv.next to i32 -+ %conv4 = sitofp i32 %1 to double -+ %call = tail call fast double @__atan2_finite(double %conv, double %conv4) -+ %arrayidx6 = getelementptr inbounds [100 x double], [100 x double]* %varray, i64 %indvars.iv19, i64 %indvars.iv -+ store double %call, double* %arrayidx6, align 8 -+ %exitcond = icmp eq i64 %indvars.iv.next, 100 -+ br i1 %exitcond, label %for.inc7, label %for.body3, !llvm.loop !5 -+ -+for.inc7: ; preds = %for.body3 -+ %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1 -+ %exitcond21 = icmp eq i64 %indvars.iv.next20, 100 -+ br i1 %exitcond21, label %for.end9, label %for.cond1.preheader -+ -+for.end9: ; preds = %for.inc7 -+ ret void -+} -+ - attributes #0 = { nounwind readnone } -+!5 = distinct !{!5, !6, !7} -+!6 = !{!"llvm.loop.vectorize.width", i32 8} -+!7 = !{!"llvm.loop.vectorize.enable", i1 true} -diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll -new file mode 100644 -index 0000000000000..326c763994343 ---- /dev/null -+++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll -@@ -0,0 +1,513 @@ -+; Check legalization of SVML calls, including intrinsic versions (like @llvm..). -+ -+; RUN: opt -vector-library=SVML -inject-tli-mappings -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s -+ -+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -+target triple = "x86_64-unknown-linux-gnu" -+ -+declare double @sin(double) #0 -+declare float @sinf(float) #0 -+declare double @llvm.sin.f64(double) #0 -+declare float @llvm.sin.f32(float) #0 -+ -+declare double @cos(double) #0 -+declare float @cosf(float) #0 -+declare double @llvm.cos.f64(double) #0 -+declare float @llvm.cos.f32(float) #0 -+ -+declare double @pow(double, double) #0 -+declare float @powf(float, float) #0 -+declare double @llvm.pow.f64(double, double) #0 -+declare float @llvm.pow.f32(float, float) #0 -+ -+declare double @exp(double) #0 -+declare float @expf(float) #0 -+declare double @llvm.exp.f64(double) #0 -+declare float @llvm.exp.f32(float) #0 -+ -+declare double @log(double) #0 -+declare float @logf(float) #0 -+declare double @llvm.log.f64(double) #0 -+declare float @llvm.log.f32(float) #0 -+ -+ -+define void @sin_f64(double* nocapture %varray) { -+; CHECK-LABEL: @sin_f64( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP2:%.*]]) -+; CHECK: [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to double -+ %call = tail call double @sin(double %conv) -+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv -+ store double %call, double* %arrayidx, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @sin_f32(float* nocapture %varray) { -+; CHECK-LABEL: @sin_f32( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_sinf8_ha(<8 x float> [[TMP2:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to float -+ %call = tail call float @sinf(float %conv) -+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv -+ store float %call, float* %arrayidx, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @sin_f64_intrinsic(double* nocapture %varray) { -+; CHECK-LABEL: @sin_f64_intrinsic( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP2:%.*]]) -+; CHECK: [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to double -+ %call = tail call double @llvm.sin.f64(double %conv) -+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv -+ store double %call, double* %arrayidx, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @sin_f32_intrinsic(float* nocapture %varray) { -+; CHECK-LABEL: @sin_f32_intrinsic( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_sinf8_ha(<8 x float> [[TMP2:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to float -+ %call = tail call float @llvm.sin.f32(float %conv) -+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv -+ store float %call, float* %arrayidx, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @cos_f64(double* nocapture %varray) { -+; CHECK-LABEL: @cos_f64( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP2:%.*]]) -+; CHECK: [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to double -+ %call = tail call double @cos(double %conv) -+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv -+ store double %call, double* %arrayidx, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @cos_f32(float* nocapture %varray) { -+; CHECK-LABEL: @cos_f32( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_cosf8_ha(<8 x float> [[TMP2:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to float -+ %call = tail call float @cosf(float %conv) -+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv -+ store float %call, float* %arrayidx, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @cos_f64_intrinsic(double* nocapture %varray) { -+; CHECK-LABEL: @cos_f64_intrinsic( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP2:%.*]]) -+; CHECK: [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to double -+ %call = tail call double @llvm.cos.f64(double %conv) -+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv -+ store double %call, double* %arrayidx, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @cos_f32_intrinsic(float* nocapture %varray) { -+; CHECK-LABEL: @cos_f32_intrinsic( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_cosf8_ha(<8 x float> [[TMP2:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to float -+ %call = tail call float @llvm.cos.f32(float %conv) -+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv -+ store float %call, float* %arrayidx, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) { -+; CHECK-LABEL: @pow_f64( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP2:%.*]], <4 x double> [[TMP3:%.*]]) -+; CHECK: [[TMP4:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP5:%.*]], <4 x double> [[TMP6:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to double -+ %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv -+ %tmp1 = load double, double* %arrayidx, align 4 -+ %tmp2 = tail call double @pow(double %conv, double %tmp1) -+ %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv -+ store double %tmp2, double* %arrayidx2, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) { -+; CHECK-LABEL: @pow_f64_intrinsic( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP2:%.*]], <4 x double> [[TMP3:%.*]]) -+; CHECK: [[TMP4:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP5:%.*]], <4 x double> [[TMP6:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to double -+ %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv -+ %tmp1 = load double, double* %arrayidx, align 4 -+ %tmp2 = tail call double @llvm.pow.f64(double %conv, double %tmp1) -+ %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv -+ store double %tmp2, double* %arrayidx2, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) { -+; CHECK-LABEL: @pow_f32( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_powf8_ha(<8 x float> [[TMP2:%.*]], <8 x float> [[WIDE_LOAD:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to float -+ %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv -+ %tmp1 = load float, float* %arrayidx, align 4 -+ %tmp2 = tail call float @powf(float %conv, float %tmp1) -+ %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv -+ store float %tmp2, float* %arrayidx2, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) { -+; CHECK-LABEL: @pow_f32_intrinsic( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_powf8_ha(<8 x float> [[TMP2:%.*]], <8 x float> [[TMP3:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to float -+ %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv -+ %tmp1 = load float, float* %arrayidx, align 4 -+ %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1) -+ %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv -+ store float %tmp2, float* %arrayidx2, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @exp_f64(double* nocapture %varray) { -+; CHECK-LABEL: @exp_f64( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP2:%.*]]) -+; CHECK: [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to double -+ %call = tail call double @exp(double %conv) -+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv -+ store double %call, double* %arrayidx, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @exp_f32(float* nocapture %varray) { -+; CHECK-LABEL: @exp_f32( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_expf8_ha(<8 x float> [[TMP2:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to float -+ %call = tail call float @expf(float %conv) -+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv -+ store float %call, float* %arrayidx, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @exp_f64_intrinsic(double* nocapture %varray) { -+; CHECK-LABEL: @exp_f64_intrinsic( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP2:%.*]]) -+; CHECK: [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to double -+ %call = tail call double @llvm.exp.f64(double %conv) -+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv -+ store double %call, double* %arrayidx, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @exp_f32_intrinsic(float* nocapture %varray) { -+; CHECK-LABEL: @exp_f32_intrinsic( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_expf8_ha(<8 x float> [[TMP2:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to float -+ %call = tail call float @llvm.exp.f32(float %conv) -+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv -+ store float %call, float* %arrayidx, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @log_f64(double* nocapture %varray) { -+; CHECK-LABEL: @log_f64( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP2:%.*]]) -+; CHECK: [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to double -+ %call = tail call double @log(double %conv) -+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv -+ store double %call, double* %arrayidx, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @log_f32(float* nocapture %varray) { -+; CHECK-LABEL: @log_f32( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_logf8_ha(<8 x float> [[TMP2:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to float -+ %call = tail call float @logf(float %conv) -+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv -+ store float %call, float* %arrayidx, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @log_f64_intrinsic(double* nocapture %varray) { -+; CHECK-LABEL: @log_f64_intrinsic( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP2:%.*]]) -+; CHECK: [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to double -+ %call = tail call double @llvm.log.f64(double %conv) -+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv -+ store double %call, double* %arrayidx, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+define void @log_f32_intrinsic(float* nocapture %varray) { -+; CHECK-LABEL: @log_f32_intrinsic( -+; CHECK: [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_logf8_ha(<8 x float> [[TMP2:%.*]]) -+; CHECK: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %tmp = trunc i64 %iv to i32 -+ %conv = sitofp i32 %tmp to float -+ %call = tail call float @llvm.log.f32(float %conv) -+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv -+ store float %call, float* %arrayidx, align 4 -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond = icmp eq i64 %iv.next, 1000 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -+ -+attributes #0 = { nounwind readnone } -+ -diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll -new file mode 100644 -index 0000000000000..9422653445dc2 ---- /dev/null -+++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll -@@ -0,0 +1,61 @@ -+; Check that vector codegen splits illegal sin8 call to two sin4 calls on AVX for double datatype. -+; The C code used to generate this test: -+ -+; #include -+; -+; void foo(double *a, int N){ -+; int i; -+; #pragma clang loop vectorize_width(8) -+; for (i=0;i [[I0:%.*]] to <8 x double> -+; CHECK-NEXT: [[S1:%shuffle.*]] = shufflevector <8 x double> [[I1]], <8 x double> undef, <4 x i32> -+; CHECK-NEXT: [[I2:%.*]] = call fast intel_svmlcc256 <4 x double> @__svml_sin4(<4 x double> [[S1]]) -+; CHECK-NEXT: [[S2:%shuffle.*]] = shufflevector <8 x double> [[I1]], <8 x double> undef, <4 x i32> -+; CHECK-NEXT: [[I3:%.*]] = call fast intel_svmlcc256 <4 x double> @__svml_sin4(<4 x double> [[S2]]) -+; CHECK-NEXT: [[comb:%combined.*]] = shufflevector <4 x double> [[I2]], <4 x double> [[I3]], <8 x i32> -+; CHECK: store <8 x double> [[comb]], <8 x double>* [[TMP:%.*]], align 8 -+ -+ -+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -+target triple = "x86_64-unknown-linux-gnu" -+ -+; Function Attrs: nounwind uwtable -+define dso_local void @foo(double* nocapture %a, i32 %N) local_unnamed_addr #0 { -+entry: -+ %cmp5 = icmp sgt i32 %N, 0 -+ br i1 %cmp5, label %for.body.preheader, label %for.end -+ -+for.body.preheader: ; preds = %entry -+ %wide.trip.count = zext i32 %N to i64 -+ br label %for.body -+ -+for.body: ; preds = %for.body, %for.body.preheader -+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] -+ %0 = trunc i64 %indvars.iv to i32 -+ %conv = sitofp i32 %0 to double -+ %call = tail call fast double @sin(double %conv) #2 -+ %arrayidx = getelementptr inbounds double, double* %a, i64 %indvars.iv -+ store double %call, double* %arrayidx, align 8, !tbaa !2 -+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -+ %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count -+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !6 -+ -+for.end: ; preds = %for.body, %entry -+ ret void -+} -+ -+; Function Attrs: nounwind -+declare dso_local double @sin(double) local_unnamed_addr #1 -+ -+!2 = !{!3, !3, i64 0} -+!3 = !{!"double", !4, i64 0} -+!4 = !{!"omnipotent char", !5, i64 0} -+!5 = !{!"Simple C/C++ TBAA"} -+!6 = distinct !{!6, !7} -+!7 = !{!"llvm.loop.vectorize.width", i32 8} -diff --git a/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll b/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll -index e8c83c4d9bd1f..615fdc29176a2 100644 ---- a/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll -+++ b/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll -@@ -12,12 +12,12 @@ target triple = "x86_64-unknown-linux-gnu" - - ; COMMON-LABEL: @llvm.compiler.used = appending global - ; SVML-SAME: [6 x i8*] [ --; SVML-SAME: i8* bitcast (<2 x double> (<2 x double>)* @__svml_sin2 to i8*), --; SVML-SAME: i8* bitcast (<4 x double> (<4 x double>)* @__svml_sin4 to i8*), --; SVML-SAME: i8* bitcast (<8 x double> (<8 x double>)* @__svml_sin8 to i8*), --; SVML-SAME: i8* bitcast (<4 x float> (<4 x float>)* @__svml_log10f4 to i8*), --; SVML-SAME: i8* bitcast (<8 x float> (<8 x float>)* @__svml_log10f8 to i8*), --; SVML-SAME: i8* bitcast (<16 x float> (<16 x float>)* @__svml_log10f16 to i8*) -+; SVML-SAME: i8* bitcast (<2 x double> (<2 x double>)* @__svml_sin2_ha to i8*), -+; SVML-SAME: i8* bitcast (<4 x double> (<4 x double>)* @__svml_sin4_ha to i8*), -+; SVML-SAME: i8* bitcast (<8 x double> (<8 x double>)* @__svml_sin8_ha to i8*), -+; SVML-SAME: i8* bitcast (<4 x float> (<4 x float>)* @__svml_log10f4_ha to i8*), -+; SVML-SAME: i8* bitcast (<8 x float> (<8 x float>)* @__svml_log10f8_ha to i8*), -+; SVML-SAME: i8* bitcast (<16 x float> (<16 x float>)* @__svml_log10f16_ha to i8*) - ; MASSV-SAME: [2 x i8*] [ - ; MASSV-SAME: i8* bitcast (<2 x double> (<2 x double>)* @__sind2 to i8*), - ; MASSV-SAME: i8* bitcast (<4 x float> (<4 x float>)* @__log10f4 to i8*) -@@ -59,9 +59,9 @@ declare float @llvm.log10.f32(float) #0 - attributes #0 = { nounwind readnone } - - ; SVML: attributes #[[SIN]] = { "vector-function-abi-variant"= --; SVML-SAME: "_ZGV_LLVM_N2v_sin(__svml_sin2), --; SVML-SAME: _ZGV_LLVM_N4v_sin(__svml_sin4), --; SVML-SAME: _ZGV_LLVM_N8v_sin(__svml_sin8)" } -+; SVML-SAME: "_ZGV_LLVM_N2v_sin(__svml_sin2_ha), -+; SVML-SAME: _ZGV_LLVM_N4v_sin(__svml_sin4_ha), -+; SVML-SAME: _ZGV_LLVM_N8v_sin(__svml_sin8_ha)" } - - ; MASSV: attributes #[[SIN]] = { "vector-function-abi-variant"= - ; MASSV-SAME: "_ZGV_LLVM_N2v_sin(__sind2)" } -diff --git a/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt b/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt -index 97df6a55d1b59..199e0285c9e5d 100644 ---- a/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt -+++ b/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt -@@ -47,6 +47,7 @@ add_tablegen(llvm-tblgen LLVM - SearchableTableEmitter.cpp - SubtargetEmitter.cpp - SubtargetFeatureInfo.cpp -+ SVMLEmitter.cpp - TableGen.cpp - Types.cpp - X86DisassemblerTables.cpp -diff --git a/llvm-14.0.6.src/utils/TableGen/SVMLEmitter.cpp b/llvm-14.0.6.src/utils/TableGen/SVMLEmitter.cpp -new file mode 100644 -index 0000000000000..a5aeea48db28b ---- /dev/null -+++ b/llvm-14.0.6.src/utils/TableGen/SVMLEmitter.cpp -@@ -0,0 +1,110 @@ -+//===------ SVMLEmitter.cpp - Generate SVML function variants -------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// This tablegen backend emits the scalar to svml function map for TLI. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "CodeGenTarget.h" -+#include "llvm/Support/Format.h" -+#include "llvm/TableGen/Error.h" -+#include "llvm/TableGen/Record.h" -+#include "llvm/TableGen/TableGenBackend.h" -+#include -+#include -+ -+using namespace llvm; -+ -+#define DEBUG_TYPE "SVMLVariants" -+#include "llvm/Support/Debug.h" -+ -+namespace { -+ -+class SVMLVariantsEmitter { -+ -+ RecordKeeper &Records; -+ -+private: -+ void emitSVMLVariants(raw_ostream &OS); -+ -+public: -+ SVMLVariantsEmitter(RecordKeeper &R) : Records(R) {} -+ -+ void run(raw_ostream &OS); -+}; -+} // End anonymous namespace -+ -+/// \brief Emit the set of SVML variant function names. -+// The default is to emit the high accuracy SVML variants until a mechanism is -+// introduced to allow a selection of different variants through precision -+// requirements specified by the user. This code generates mappings to svml -+// that are in the scalar form of llvm intrinsics, math library calls, or the -+// finite variants of math library calls. -+void SVMLVariantsEmitter::emitSVMLVariants(raw_ostream &OS) { -+ -+ const unsigned MinSinglePrecVL = 4; -+ const unsigned MaxSinglePrecVL = 16; -+ const unsigned MinDoublePrecVL = 2; -+ const unsigned MaxDoublePrecVL = 8; -+ -+ OS << "#ifdef GET_SVML_VARIANTS\n"; -+ -+ for (const auto &D : Records.getAllDerivedDefinitions("SvmlVariant")) { -+ StringRef SvmlVariantNameStr = D->getName(); -+ // Single Precision SVML -+ for (unsigned VL = MinSinglePrecVL; VL <= MaxSinglePrecVL; VL *= 2) { -+ // Emit the scalar math library function to svml function entry. -+ OS << "{\"" << SvmlVariantNameStr << "f" << "\", "; -+ OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", " -+ << "ElementCount::getFixed(" << VL << ")},\n"; -+ -+ // Emit the scalar intrinsic to svml function entry. -+ OS << "{\"" << "llvm." << SvmlVariantNameStr << ".f32" << "\", "; -+ OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", " -+ << "ElementCount::getFixed(" << VL << ")},\n"; -+ -+ // Emit the finite math library function to svml function entry. -+ OS << "{\"__" << SvmlVariantNameStr << "f_finite" << "\", "; -+ OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", " -+ << "ElementCount::getFixed(" << VL << ")},\n"; -+ } -+ -+ // Double Precision SVML -+ for (unsigned VL = MinDoublePrecVL; VL <= MaxDoublePrecVL; VL *= 2) { -+ // Emit the scalar math library function to svml function entry. -+ OS << "{\"" << SvmlVariantNameStr << "\", "; -+ OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " << "ElementCount::getFixed(" << VL -+ << ")},\n"; -+ -+ // Emit the scalar intrinsic to svml function entry. -+ OS << "{\"" << "llvm." << SvmlVariantNameStr << ".f64" << "\", "; -+ OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " << "ElementCount::getFixed(" << VL -+ << ")},\n"; -+ -+ // Emit the finite math library function to svml function entry. -+ OS << "{\"__" << SvmlVariantNameStr << "_finite" << "\", "; -+ OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " -+ << "ElementCount::getFixed(" << VL << ")},\n"; -+ } -+ } -+ -+ OS << "#endif // GET_SVML_VARIANTS\n\n"; -+} -+ -+void SVMLVariantsEmitter::run(raw_ostream &OS) { -+ emitSVMLVariants(OS); -+} -+ -+namespace llvm { -+ -+void EmitSVMLVariants(RecordKeeper &RK, raw_ostream &OS) { -+ SVMLVariantsEmitter(RK).run(OS); -+} -+ -+} // End llvm namespace -diff --git a/llvm-14.0.6.src/utils/TableGen/TableGen.cpp b/llvm-14.0.6.src/utils/TableGen/TableGen.cpp -index 2d4a45f889be6..603d0c223b33a 100644 ---- a/llvm-14.0.6.src/utils/TableGen/TableGen.cpp -+++ b/llvm-14.0.6.src/utils/TableGen/TableGen.cpp -@@ -57,6 +57,7 @@ enum ActionType { - GenAutomata, - GenDirectivesEnumDecl, - GenDirectivesEnumImpl, -+ GenSVMLVariants, - }; - - namespace llvm { -@@ -138,7 +139,9 @@ cl::opt Action( - clEnumValN(GenDirectivesEnumDecl, "gen-directive-decl", - "Generate directive related declaration code (header file)"), - clEnumValN(GenDirectivesEnumImpl, "gen-directive-impl", -- "Generate directive related implementation code"))); -+ "Generate directive related implementation code"), -+ clEnumValN(GenSVMLVariants, "gen-svml", -+ "Generate SVML variant function names"))); - - cl::OptionCategory PrintEnumsCat("Options for -print-enums"); - cl::opt Class("class", cl::desc("Print Enum list for this class"), -@@ -272,6 +275,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { - case GenDirectivesEnumImpl: - EmitDirectivesImpl(Records, OS); - break; -+ case GenSVMLVariants: -+ EmitSVMLVariants(Records, OS); -+ break; - } - - return false; -diff --git a/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h b/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h -index 71db8dc77b052..86c3a3068c2dc 100644 ---- a/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h -+++ b/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h -@@ -93,6 +93,7 @@ void EmitExegesis(RecordKeeper &RK, raw_ostream &OS); - void EmitAutomata(RecordKeeper &RK, raw_ostream &OS); - void EmitDirectivesDecl(RecordKeeper &RK, raw_ostream &OS); - void EmitDirectivesImpl(RecordKeeper &RK, raw_ostream &OS); -+void EmitSVMLVariants(RecordKeeper &RK, raw_ostream &OS); - - } // End llvm namespace - -diff --git a/llvm-14.0.6.src/utils/vim/syntax/llvm.vim b/llvm-14.0.6.src/utils/vim/syntax/llvm.vim -index 205db16b7d8cd..2572ab5a59e1b 100644 ---- a/llvm-14.0.6.src/utils/vim/syntax/llvm.vim -+++ b/llvm-14.0.6.src/utils/vim/syntax/llvm.vim -@@ -104,6 +104,7 @@ syn keyword llvmKeyword - \ inreg - \ intel_ocl_bicc - \ inteldialect -+ \ intel_svmlcc - \ internal - \ jumptable - \ linkonce diff --git a/conda-recipes/llvmdev/bld.bat b/conda-recipes/llvmdev/bld.bat index 4d16d84b1..ec84d9897 100644 --- a/conda-recipes/llvmdev/bld.bat +++ b/conda-recipes/llvmdev/bld.bat @@ -1,94 +1,59 @@ -setlocal EnableDelayedExpansion -FOR /D %%d IN (llvm-*.src) DO (MKLINK /J llvm %%d -if !errorlevel! neq 0 exit /b %errorlevel%) -FOR /D %%d IN (lld-*.src) DO (MKLINK /J lld %%d -if !errorlevel! neq 0 exit /b %errorlevel%) -FOR /D %%d IN (unwind\libunwind-*.src) DO (MKLINK /J libunwind %%d -if !errorlevel! neq 0 exit /b %errorlevel%) - -DIR +REM base on https://github.com/AnacondaRecipes/llvmdev-feedstock/blob/master/recipe/bld.bat +echo on mkdir build cd build -set BUILD_CONFIG=Release - -REM === Configure step === - -REM allow setting the targets to build as an environment variable -if "%LLVM_TARGETS_TO_BUILD%"=="" ( - set "LLVM_TARGETS_TO_BUILD=all" -) -if "%ARCH%"=="32" ( - set "ARCH_POSTFIX=" - set "GEN_ARCH=Win32" -) else ( - set "ARCH_POSTFIX= Win64" - set "GEN_ARCH=x64" -) - -REM The platform toolset host arch is set to x64 so as to use the 64bit linker, -REM the 32bit linker heap is too small for llvm8 so it tries and falls over to -REM the 64bit linker anyway. This must be passed in to certain generators as -REM '-Thost x64'. -set PreferredToolArchitecture=x64 - -set MAX_INDEX_CMAKE_GENERATOR=0 - -set "CMAKE_GENERATOR[0]=Visual Studio 16 2019" +REM remove GL flag for now +set "CXXFLAGS=-MD" +set "CC=cl.exe" +set "CXX=cl.exe" -set "CMAKE_GENERATOR_ARCHITECTURE[0]=%GEN_ARCH%" - -set "CMAKE_GENERATOR_TOOLSET[0]=v142" - -REM Reduce build times and package size by removing unused stuff -REM BENCHMARKS (new for llvm8) don't build under Visual Studio 14 2015 -set CMAKE_CUSTOM=-DLLVM_TARGETS_TO_BUILD="%LLVM_TARGETS_TO_BUILD%" ^ - -DLLVM_ENABLE_PROJECTS:STRING=lld ^ - -DLLVM_ENABLE_ZLIB=OFF ^ - -DLLVM_INCLUDE_UTILS=ON ^ +cmake -G "Ninja" ^ + -DCMAKE_BUILD_TYPE="Release" ^ + -DCMAKE_PREFIX_PATH=%LIBRARY_PREFIX% ^ + -DCMAKE_INSTALL_PREFIX:PATH=%LIBRARY_PREFIX% ^ + -DLLVM_USE_INTEL_JITEVENTS=ON ^ + -DLLVM_ENABLE_LIBXML2=FORCE_ON ^ + -DLLVM_ENABLE_RTTI=ON ^ + -DLLVM_ENABLE_ZLIB=FORCE_ON ^ + -DLLVM_ENABLE_ZSTD=FORCE_ON ^ + -DLLVM_INCLUDE_BENCHMARKS=OFF ^ -DLLVM_INCLUDE_DOCS=OFF ^ -DLLVM_INCLUDE_EXAMPLES=OFF ^ + -DLLVM_INCLUDE_TESTS=ON ^ + -DLLVM_INCLUDE_UTILS=ON ^ + -DLLVM_INSTALL_UTILS=ON ^ + -DLLVM_UTILS_INSTALL_DIR=libexec\llvm ^ + -DLLVM_BUILD_LLVM_C_DYLIB=no ^ + -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly ^ + -DCMAKE_POLICY_DEFAULT_CMP0111=NEW ^ + -DLLVM_ENABLE_PROJECTS:STRING=lld;compiler-rt ^ -DLLVM_ENABLE_ASSERTIONS=ON ^ - -DLLVM_USE_INTEL_JITEVENTS=ON ^ - -DLLVM_INCLUDE_BENCHMARKS=OFF ^ -DLLVM_ENABLE_DIA_SDK=OFF ^ - -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly - -REM try all compatible visual studio toolsets to find one that is installed -setlocal enabledelayedexpansion -for /l %%n in (0,1,%MAX_INDEX_CMAKE_GENERATOR%) do ( - cmake -G "!CMAKE_GENERATOR[%%n]!" ^ - -A "!CMAKE_GENERATOR_ARCHITECTURE[%%n]!" ^ - -T "!CMAKE_GENERATOR_TOOLSET[%%n]!" ^ - -DCMAKE_BUILD_TYPE="%BUILD_CONFIG%" ^ - -DCMAKE_PREFIX_PATH="%LIBRARY_PREFIX%" ^ - -DCMAKE_INSTALL_PREFIX:PATH="%LIBRARY_PREFIX%" ^ - %CMAKE_CUSTOM% "%SRC_DIR%\llvm" - if not errorlevel 1 goto configuration_successful - del CMakeCache.txt -) - -REM no compatible visual studio toolset was found -if errorlevel 1 exit 1 - -:configuration_successful -endlocal - -REM === Build step === -cmake --build . --config "%BUILD_CONFIG%" -if errorlevel 1 exit 1 - -REM === Install step === -cmake --build . --config "%BUILD_CONFIG%" --target install -if errorlevel 1 exit 1 - -REM From: https://github.com/conda-forge/llvmdev-feedstock/pull/53 -"%BUILD_CONFIG%\bin\opt" -S -vector-library=SVML -mcpu=haswell -O3 "%RECIPE_DIR%\numba-3016.ll" | "%BUILD_CONFIG%\bin\FileCheck" "%RECIPE_DIR%\numba-3016.ll" -if errorlevel 1 exit 1 - -REM This is technically how to run the suite, but it will only run in an -REM enhanced unix-like shell which has functions like `grep` available. -REM cd ..\test -REM "%PYTHON%" "..\build\%BUILD_CONFIG%\bin\llvm-lit.py" -vv Transforms ExecutionEngine Analysis CodeGen/X86 -REM if errorlevel 1 exit 1 + -DCOMPILER_RT_BUILD_BUILTINS=ON ^ + -DCOMPILER_RT_BUILTINS_HIDE_SYMBOLS=OFF ^ + -DCOMPILER_RT_BUILD_LIBFUZZER=OFF ^ + -DCOMPILER_RT_BUILD_CRT=OFF ^ + -DCOMPILER_RT_BUILD_MEMPROF=OFF ^ + -DCOMPILER_RT_BUILD_PROFILE=OFF ^ + -DCOMPILER_RT_BUILD_SANITIZERS=OFF ^ + -DCOMPILER_RT_BUILD_XRAY=OFF ^ + -DCOMPILER_RT_BUILD_GWP_ASAN=OFF ^ + -DCOMPILER_RT_BUILD_ORC=OFF ^ + -DCOMPILER_RT_INCLUDE_TESTS=OFF ^ + %SRC_DIR%/llvm +if %ERRORLEVEL% neq 0 exit 1 + +cmake --build . +if %ERRORLEVEL% neq 0 exit 1 + +cmake --build . --target install + +if %ERRORLEVEL% neq 0 exit 1 + +REM bin\opt -S -vector-library=SVML -mcpu=haswell -O3 %RECIPE_DIR%\numba-3016.ll | bin\FileCheck %RECIPE_DIR%\numba-3016.ll +REM if %ERRORLEVEL% neq 0 exit 1 + +cd ..\llvm\test +python ..\..\build\bin\llvm-lit.py -vv Transforms ExecutionEngine Analysis CodeGen/X86 diff --git a/conda-recipes/llvmdev/build.sh b/conda-recipes/llvmdev/build.sh index caccfe127..e1adebd59 100644 --- a/conda-recipes/llvmdev/build.sh +++ b/conda-recipes/llvmdev/build.sh @@ -4,97 +4,111 @@ set -x -# allow setting the targets to build as an environment variable -LLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD:-"all"} +# Make osx work like linux. +sed -i.bak "s/NOT APPLE AND ARG_SONAME/ARG_SONAME/g" llvm/cmake/modules/AddLLVM.cmake +sed -i.bak "s/NOT APPLE AND NOT ARG_SONAME/NOT ARG_SONAME/g" llvm/cmake/modules/AddLLVM.cmake -# This is the clang compiler prefix -if [[ $build_platform == osx-arm64 ]]; then - DARWIN_TARGET=arm64-apple-darwin20.0.0 -else - DARWIN_TARGET=x86_64-apple-darwin13.4.0 -fi +mkdir build +cd build + +export CPU_COUNT=4 -mv llvm-*.src llvm -mv lld-*.src lld -mv unwind/libunwind-*.src libunwind - -declare -a _cmake_config -_cmake_config+=(-DCMAKE_INSTALL_PREFIX:PATH=${PREFIX}) -_cmake_config+=(-DCMAKE_BUILD_TYPE:STRING=Release) -_cmake_config+=(-DLLVM_ENABLE_PROJECTS:STRING="lld") -# The bootstrap clang I use was built with a static libLLVMObject.a and I trying to get the same here -# _cmake_config+=(-DBUILD_SHARED_LIBS:BOOL=ON) -_cmake_config+=(-DLLVM_ENABLE_ASSERTIONS:BOOL=ON) -_cmake_config+=(-DLINK_POLLY_INTO_TOOLS:BOOL=ON) -# Don't really require libxml2. Turn it off explicitly to avoid accidentally linking to system libs -_cmake_config+=(-DLLVM_ENABLE_LIBXML2:BOOL=OFF) -# Urgh, llvm *really* wants to link to ncurses / terminfo and we *really* do not want it to. -_cmake_config+=(-DHAVE_TERMINFO_CURSES=OFF) -_cmake_config+=(-DLLVM_ENABLE_TERMINFO=OFF) -# Sometimes these are reported as unused. Whatever. -_cmake_config+=(-DHAVE_TERMINFO_NCURSES=OFF) -_cmake_config+=(-DHAVE_TERMINFO_NCURSESW=OFF) -_cmake_config+=(-DHAVE_TERMINFO_TERMINFO=OFF) -_cmake_config+=(-DHAVE_TERMINFO_TINFO=OFF) -_cmake_config+=(-DHAVE_TERMIOS_H=OFF) -_cmake_config+=(-DCLANG_ENABLE_LIBXML=OFF) -_cmake_config+=(-DLIBOMP_INSTALL_ALIASES=OFF) -_cmake_config+=(-DLLVM_ENABLE_RTTI=OFF) -_cmake_config+=(-DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD}) -_cmake_config+=(-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly) -_cmake_config+=(-DLLVM_INCLUDE_UTILS=ON) # for llvm-lit -_cmake_config+=(-DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF) # doesn't build without the rest of LLVM project -# TODO :: It would be nice if we had a cross-ecosystem 'BUILD_TIME_LIMITED' env var we could use to -# disable these unnecessary but useful things. -if [[ ${CONDA_FORGE} == yes ]]; then - _cmake_config+=(-DLLVM_INCLUDE_DOCS=OFF) - _cmake_config+=(-DLLVM_INCLUDE_EXAMPLES=OFF) +CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_ENABLE_PROJECTS=lld;libunwind;compiler-rt" + +if [[ "$target_platform" == "linux-64" ]]; then + CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_USE_INTEL_JITEVENTS=ON" fi -# Only valid when using the Ninja Generator AFAICT -# _cmake_config+=(-DLLVM_PARALLEL_LINK_JOBS:STRING=1) -# What about cross-compiling targetting Darwin here? Are any of these needed? -if [[ $(uname) == Darwin ]]; then - _cmake_config+=(-DCMAKE_OSX_SYSROOT=${SYSROOT_DIR}) - _cmake_config+=(-DDARWIN_macosx_CACHED_SYSROOT=${SYSROOT_DIR}) - _cmake_config+=(-DCMAKE_OSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET}) - _cmake_config+=(-DCMAKE_LIBTOOL=$(which ${DARWIN_TARGET}-libtool)) - _cmake_config+=(-DLD64_EXECUTABLE=$(which ${DARWIN_TARGET}-ld)) - _cmake_config+=(-DCMAKE_INSTALL_NAME_TOOL=$(which ${DARWIN_TARGET}-install_name_tool)) - # Once we are using our libc++ (not until llvm_build_final), it will be single-arch only and not setting - # this causes link failures building the santizers since they respect DARWIN_osx_ARCHS. We may as well - # save some compilation time by setting this for all of our llvm builds. - _cmake_config+=(-DDARWIN_osx_ARCHS=x86_64) -elif [[ $(uname) == Linux ]]; then - _cmake_config+=(-DLLVM_USE_INTEL_JITEVENTS=ON) -# _cmake_config+=(-DLLVM_BINUTILS_INCDIR=${PREFIX}/lib/gcc/${cpu_arch}-${vendor}-linux-gnu/${compiler_ver}/plugin/include) + +if [[ "$CC_FOR_BUILD" != "" && "$CC_FOR_BUILD" != "$CC" ]]; then + CMAKE_ARGS="${CMAKE_ARGS} -DCROSS_TOOLCHAIN_FLAGS_NATIVE=-DCMAKE_C_COMPILER=$CC_FOR_BUILD;-DCMAKE_CXX_COMPILER=$CXX_FOR_BUILD;-DCMAKE_C_FLAGS=-O2;-DCMAKE_CXX_FLAGS=-O2;-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath,${BUILD_PREFIX}/lib;-DCMAKE_MODULE_LINKER_FLAGS=;-DCMAKE_SHARED_LINKER_FLAGS=;-DCMAKE_STATIC_LINKER_FLAGS=;-DLLVM_INCLUDE_BENCHMARKS=OFF;" + CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_HOST_TRIPLE=$(echo $HOST | sed s/conda/unknown/g) -DLLVM_DEFAULT_TARGET_TRIPLE=$(echo $HOST | sed s/conda/unknown/g)" fi -# For when the going gets tough: -# _cmake_config+=(-Wdev) -# _cmake_config+=(--debug-output) -# _cmake_config+=(--trace-expand) -# CPU_COUNT=1 +# disable -fno-plt due to https://bugs.llvm.org/show_bug.cgi?id=51863 due to some GCC bug +if [[ "$target_platform" == "linux-ppc64le" ]]; then + CFLAGS="$(echo $CFLAGS | sed 's/-fno-plt //g')" + CXXFLAGS="$(echo $CXXFLAGS | sed 's/-fno-plt //g')" + CMAKE_ARGS="${CMAKE_ARGS} -DFFI_INCLUDE_DIR=$PREFIX/include" + CMAKE_ARGS="${CMAKE_ARGS} -DFFI_LIBRARY_DIR=$PREFIX/lib" +fi -mkdir build -cd build +if [[ $target_platform == osx-arm64 ]]; then + CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_ENABLE_WERROR=FALSE" +fi -cmake -G'Unix Makefiles' \ - "${_cmake_config[@]}" \ +cmake -DCMAKE_INSTALL_PREFIX="${PREFIX}" \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_LIBRARY_PATH="${PREFIX}" \ + -DLLVM_ENABLE_LIBEDIT=OFF \ + -DLLVM_ENABLE_LIBXML2=OFF \ + -DLLVM_ENABLE_RTTI=ON \ + -DLLVM_ENABLE_TERMINFO=OFF \ + -DLLVM_INCLUDE_BENCHMARKS=OFF \ + -DLLVM_INCLUDE_DOCS=OFF \ + -DLLVM_INCLUDE_EXAMPLES=OFF \ + -DLLVM_INCLUDE_GO_TESTS=OFF \ + -DLLVM_INCLUDE_TESTS=ON \ + -DLLVM_INCLUDE_UTILS=ON \ + -DLLVM_INSTALL_UTILS=ON \ + -DLLVM_UTILS_INSTALL_DIR=libexec/llvm \ + -DLLVM_BUILD_LLVM_DYLIB=OFF \ + -DLLVM_LINK_LLVM_DYLIB=OFF \ + -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly \ + -DLLVM_ENABLE_FFI=ON \ + -DLLVM_ENABLE_Z3_SOLVER=OFF \ + -DLLVM_OPTIMIZED_TABLEGEN=ON \ + -DCMAKE_POLICY_DEFAULT_CMP0111=NEW \ + -DCOMPILER_RT_BUILD_BUILTINS=ON \ + -DCOMPILER_RT_BUILTINS_HIDE_SYMBOLS=OFF \ + -DCOMPILER_RT_BUILD_LIBFUZZER=OFF \ + -DCOMPILER_RT_BUILD_CRT=OFF \ + -DCOMPILER_RT_BUILD_MEMPROF=OFF \ + -DCOMPILER_RT_BUILD_PROFILE=OFF \ + -DCOMPILER_RT_BUILD_SANITIZERS=OFF \ + -DCOMPILER_RT_BUILD_XRAY=OFF \ + -DCOMPILER_RT_BUILD_GWP_ASAN=OFF \ + -DCOMPILER_RT_BUILD_ORC=OFF \ + -DCOMPILER_RT_INCLUDE_TESTS=OFF \ + ${CMAKE_ARGS} \ + -GNinja \ ../llvm -ARCH=`uname -m` -if [ $ARCH == 'armv7l' ]; then # RPi need thread count throttling - make -j2 VERBOSE=1 + +ninja -j${CPU_COUNT} + +ninja install + +if [[ "${target_platform}" == "linux-64" || "${target_platform}" == "osx-64" ]]; then + export TEST_CPU_FLAG="-mcpu=haswell" else - make -j${CPU_COUNT} VERBOSE=1 + export TEST_CPU_FLAG="" fi -make check-llvm-unit || exit $? +if [[ "$CONDA_BUILD_CROSS_COMPILATION" != "1" ]]; then -# From: https://github.com/conda-forge/llvmdev-feedstock/pull/53 -make install || exit $? + echo "Testing on ${target_platform}" + # bin/opt -S -vector-library=SVML $TEST_CPU_FLAG -O3 $RECIPE_DIR/numba-3016.ll | bin/FileCheck $RECIPE_DIR/numba-3016.ll || exit $? -# SVML tests on x86_64 arch only -if [[ $ARCH == 'x86_64' ]]; then - bin/opt -S -vector-library=SVML -mcpu=haswell -O3 $RECIPE_DIR/numba-3016.ll | bin/FileCheck $RECIPE_DIR/numba-3016.ll || exit $? + if [[ "$target_platform" == linux* ]]; then + ln -s $(which $CC) $BUILD_PREFIX/bin/gcc + + # These tests tests permission-based behaviour and probably fail because of some + # filesystem-related reason. They are sporadic failures and don't seem serious so they're excluded. + # Note that indents would introduce spaces into the environment variable + export LIT_FILTER_OUT='tools/llvm-ar/error-opening-permission.test|'\ +'tools/llvm-dwarfdump/X86/output.s|'\ +'tools/llvm-ifs/fail-file-write.test|'\ +'tools/llvm-ranlib/error-opening-permission.test' + fi + + if [[ "$target_platform" == osx-* ]]; then + # This failure seems like something to do with the output format of ls -lu + # and looks harmless + export LIT_FILTER_OUT='tools/llvm-objcopy/ELF/strip-preserve-atime.test|'\ +'ExecutionEngine/Interpreter/intrinsics.ll' + fi + + cd ../llvm/test + ${PYTHON} ../../build/bin/llvm-lit -vv Transforms ExecutionEngine Analysis CodeGen/X86 fi + diff --git a/conda-recipes/llvmdev/conda_build_config.yaml b/conda-recipes/llvmdev/conda_build_config.yaml index 81b7d08c3..1ae35296b 100644 --- a/conda-recipes/llvmdev/conda_build_config.yaml +++ b/conda-recipes/llvmdev/conda_build_config.yaml @@ -10,3 +10,11 @@ cxx_compiler_version: # [linux] fortran_compiler_version: # [linux] - 7 # [linux and (x86_64 or ppc64le)] - 9 # [linux and aarch64] + +c_compiler: # [win] + - vs2019 # [win] +cxx_compiler: # [win] + - vs2019 # [win] + +MACOSX_SDK_VERSION: # [osx and x86_64] + - 10.12 # [osx and x86_64] diff --git a/conda-recipes/llvmdev/meta.yaml b/conda-recipes/llvmdev/meta.yaml index 7676d234e..32d15a790 100644 --- a/conda-recipes/llvmdev/meta.yaml +++ b/conda-recipes/llvmdev/meta.yaml @@ -1,30 +1,27 @@ -{% set shortversion = "14.0" %} -{% set version = "14.0.6" %} -{% set sha256_llvm = "050922ecaaca5781fdf6631ea92bc715183f202f9d2f15147226f023414f619a" %} -{% set sha256_lld = "0c28ce0496934d37d20fec96591032dd66af8d10178a45762e0e75e85cf95ad3" %} -{% set sha256_libunwind = "3bbe9c23c73259fe39c045dc87d0b283236ba6e00750a226b2c2aeac4a51d86b" %} -{% set build_number = "3" %} +{% set shortversion = "15.0" %} +{% set version = "15.0.7" %} +{% set sha256_llvm = "8b5fcb24b4128cf04df1b0b9410ce8b1a729cb3c544e6da885d234280dedeac6" %} +{% set build_number = "1" %} package: name: llvmdev version: {{ version }} source: - - url: https://github.com/llvm/llvm-project/releases/download/llvmorg-{{ version }}/llvm-{{ version }}.src.tar.xz - fn: llvm-{{ version }}.src.tar.xz + - url: https://github.com/llvm/llvm-project/releases/download/llvmorg-{{ version.replace(".rc", "-rc") }}/llvm-project-{{ version.replace(".rc", "rc") }}.src.tar.xz sha256: {{ sha256_llvm }} patches: - - ../llvm14-clear-gotoffsetmap.patch - - ../llvm14-remove-use-of-clonefile.patch - - ../llvm14-svml.patch - - url: https://github.com/llvm/llvm-project/releases/download/llvmorg-{{ version }}/lld-{{ version }}.src.tar.xz - fn: lld-{{ version }}.src.tar.xz - sha256: {{ sha256_lld }} + - ../llvm15-clear-gotoffsetmap.patch + - ../llvm15-remove-use-of-clonefile.patch + - ../llvm15-svml.patch + - ../compiler-rt-cfi-startproc-war.patch + - ../compiler-rt-macos-build.patch - - url: https://github.com/llvm/llvm-project/releases/download/llvmorg-{{ version }}/libunwind-{{ version }}.src.tar.xz - fn: libunwind-{{ version }}.src.tar.xz - sha256: {{ sha256_libunwind }} - folder: unwind + # Patches from conda-forge needed for windows to build + # backport of zlib patches, can be dropped for vs15.0.3, see + # https://reviews.llvm.org/D135457 & https://reviews.llvm.org/D136065 + - patches/0002-CMake-Fix-Findzstd-module-for-shared-DLL-on-Windows.patch + - patches/no-windows-symlinks.patch build: number: {{ build_number }} @@ -36,25 +33,22 @@ build: requirements: build: - # We cannot do this on macOS or windows - # OSX already has llvm so has to be handled - # at build.sh time - # Windows needs to build using vs2015_runtime - # irrespective of python version - - {{ compiler('c') }} # [unix and not (armv6l or armv7l)] - - {{ compiler('cxx') }} # [unix and not (armv6l or armv7l)] + - {{ compiler('cxx') }} - cmake - - make # [unix and not (armv6l or armv7l or aarch64)] - # Needed to unpack the source tarball - - m2w64-xz # [win] - # Needed to build LLVM + - ninja - python >=3 + - libcxx # it is not defined{{ cxx_compiler_version }} # [osx] + - patch # [not win] + - m2-patch # [win] + - git # [(linux and x86_64)] + host: - # needed for llc at runtime - - zlib # [not win] - - xar # [osx and x86_64] - # llvm-lit testing needs *a* python - - python # [not (armv6l or armv7l or aarch64 or win)] + - libcxx # it is not defined{{ cxx_compiler_version }} # [osx] + - libffi # [unix] + # libxml2 supports a windows-only feature, see https://github.com/llvm/llvm-project/blob/llvmorg-17.0.6/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h + - libxml2 # [win] + - zlib + - zstd test: files: diff --git a/conda-recipes/llvmdev_llvm15/patches/0002-CMake-Fix-Findzstd-module-for-shared-DLL-on-Windows.patch b/conda-recipes/llvmdev/patches/0002-CMake-Fix-Findzstd-module-for-shared-DLL-on-Windows.patch similarity index 100% rename from conda-recipes/llvmdev_llvm15/patches/0002-CMake-Fix-Findzstd-module-for-shared-DLL-on-Windows.patch rename to conda-recipes/llvmdev/patches/0002-CMake-Fix-Findzstd-module-for-shared-DLL-on-Windows.patch diff --git a/conda-recipes/llvmdev_llvm15/patches/no-windows-symlinks.patch b/conda-recipes/llvmdev/patches/no-windows-symlinks.patch similarity index 100% rename from conda-recipes/llvmdev_llvm15/patches/no-windows-symlinks.patch rename to conda-recipes/llvmdev/patches/no-windows-symlinks.patch diff --git a/conda-recipes/llvmdev_llvm15/bld.bat b/conda-recipes/llvmdev_llvm15/bld.bat deleted file mode 100644 index ec84d9897..000000000 --- a/conda-recipes/llvmdev_llvm15/bld.bat +++ /dev/null @@ -1,59 +0,0 @@ -REM base on https://github.com/AnacondaRecipes/llvmdev-feedstock/blob/master/recipe/bld.bat -echo on - -mkdir build -cd build - -REM remove GL flag for now -set "CXXFLAGS=-MD" -set "CC=cl.exe" -set "CXX=cl.exe" - -cmake -G "Ninja" ^ - -DCMAKE_BUILD_TYPE="Release" ^ - -DCMAKE_PREFIX_PATH=%LIBRARY_PREFIX% ^ - -DCMAKE_INSTALL_PREFIX:PATH=%LIBRARY_PREFIX% ^ - -DLLVM_USE_INTEL_JITEVENTS=ON ^ - -DLLVM_ENABLE_LIBXML2=FORCE_ON ^ - -DLLVM_ENABLE_RTTI=ON ^ - -DLLVM_ENABLE_ZLIB=FORCE_ON ^ - -DLLVM_ENABLE_ZSTD=FORCE_ON ^ - -DLLVM_INCLUDE_BENCHMARKS=OFF ^ - -DLLVM_INCLUDE_DOCS=OFF ^ - -DLLVM_INCLUDE_EXAMPLES=OFF ^ - -DLLVM_INCLUDE_TESTS=ON ^ - -DLLVM_INCLUDE_UTILS=ON ^ - -DLLVM_INSTALL_UTILS=ON ^ - -DLLVM_UTILS_INSTALL_DIR=libexec\llvm ^ - -DLLVM_BUILD_LLVM_C_DYLIB=no ^ - -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly ^ - -DCMAKE_POLICY_DEFAULT_CMP0111=NEW ^ - -DLLVM_ENABLE_PROJECTS:STRING=lld;compiler-rt ^ - -DLLVM_ENABLE_ASSERTIONS=ON ^ - -DLLVM_ENABLE_DIA_SDK=OFF ^ - -DCOMPILER_RT_BUILD_BUILTINS=ON ^ - -DCOMPILER_RT_BUILTINS_HIDE_SYMBOLS=OFF ^ - -DCOMPILER_RT_BUILD_LIBFUZZER=OFF ^ - -DCOMPILER_RT_BUILD_CRT=OFF ^ - -DCOMPILER_RT_BUILD_MEMPROF=OFF ^ - -DCOMPILER_RT_BUILD_PROFILE=OFF ^ - -DCOMPILER_RT_BUILD_SANITIZERS=OFF ^ - -DCOMPILER_RT_BUILD_XRAY=OFF ^ - -DCOMPILER_RT_BUILD_GWP_ASAN=OFF ^ - -DCOMPILER_RT_BUILD_ORC=OFF ^ - -DCOMPILER_RT_INCLUDE_TESTS=OFF ^ - %SRC_DIR%/llvm -if %ERRORLEVEL% neq 0 exit 1 - -cmake --build . -if %ERRORLEVEL% neq 0 exit 1 - -cmake --build . --target install - -if %ERRORLEVEL% neq 0 exit 1 - -REM bin\opt -S -vector-library=SVML -mcpu=haswell -O3 %RECIPE_DIR%\numba-3016.ll | bin\FileCheck %RECIPE_DIR%\numba-3016.ll -REM if %ERRORLEVEL% neq 0 exit 1 - -cd ..\llvm\test -python ..\..\build\bin\llvm-lit.py -vv Transforms ExecutionEngine Analysis CodeGen/X86 diff --git a/conda-recipes/llvmdev_llvm15/build.sh b/conda-recipes/llvmdev_llvm15/build.sh deleted file mode 100644 index e1adebd59..000000000 --- a/conda-recipes/llvmdev_llvm15/build.sh +++ /dev/null @@ -1,114 +0,0 @@ -#!/bin/bash - -# based on https://github.com/AnacondaRecipes/llvmdev-feedstock/blob/master/recipe/build.sh - -set -x - -# Make osx work like linux. -sed -i.bak "s/NOT APPLE AND ARG_SONAME/ARG_SONAME/g" llvm/cmake/modules/AddLLVM.cmake -sed -i.bak "s/NOT APPLE AND NOT ARG_SONAME/NOT ARG_SONAME/g" llvm/cmake/modules/AddLLVM.cmake - -mkdir build -cd build - -export CPU_COUNT=4 - -CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_ENABLE_PROJECTS=lld;libunwind;compiler-rt" - -if [[ "$target_platform" == "linux-64" ]]; then - CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_USE_INTEL_JITEVENTS=ON" -fi - -if [[ "$CC_FOR_BUILD" != "" && "$CC_FOR_BUILD" != "$CC" ]]; then - CMAKE_ARGS="${CMAKE_ARGS} -DCROSS_TOOLCHAIN_FLAGS_NATIVE=-DCMAKE_C_COMPILER=$CC_FOR_BUILD;-DCMAKE_CXX_COMPILER=$CXX_FOR_BUILD;-DCMAKE_C_FLAGS=-O2;-DCMAKE_CXX_FLAGS=-O2;-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath,${BUILD_PREFIX}/lib;-DCMAKE_MODULE_LINKER_FLAGS=;-DCMAKE_SHARED_LINKER_FLAGS=;-DCMAKE_STATIC_LINKER_FLAGS=;-DLLVM_INCLUDE_BENCHMARKS=OFF;" - CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_HOST_TRIPLE=$(echo $HOST | sed s/conda/unknown/g) -DLLVM_DEFAULT_TARGET_TRIPLE=$(echo $HOST | sed s/conda/unknown/g)" -fi - -# disable -fno-plt due to https://bugs.llvm.org/show_bug.cgi?id=51863 due to some GCC bug -if [[ "$target_platform" == "linux-ppc64le" ]]; then - CFLAGS="$(echo $CFLAGS | sed 's/-fno-plt //g')" - CXXFLAGS="$(echo $CXXFLAGS | sed 's/-fno-plt //g')" - CMAKE_ARGS="${CMAKE_ARGS} -DFFI_INCLUDE_DIR=$PREFIX/include" - CMAKE_ARGS="${CMAKE_ARGS} -DFFI_LIBRARY_DIR=$PREFIX/lib" -fi - -if [[ $target_platform == osx-arm64 ]]; then - CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_ENABLE_WERROR=FALSE" -fi - -cmake -DCMAKE_INSTALL_PREFIX="${PREFIX}" \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_LIBRARY_PATH="${PREFIX}" \ - -DLLVM_ENABLE_LIBEDIT=OFF \ - -DLLVM_ENABLE_LIBXML2=OFF \ - -DLLVM_ENABLE_RTTI=ON \ - -DLLVM_ENABLE_TERMINFO=OFF \ - -DLLVM_INCLUDE_BENCHMARKS=OFF \ - -DLLVM_INCLUDE_DOCS=OFF \ - -DLLVM_INCLUDE_EXAMPLES=OFF \ - -DLLVM_INCLUDE_GO_TESTS=OFF \ - -DLLVM_INCLUDE_TESTS=ON \ - -DLLVM_INCLUDE_UTILS=ON \ - -DLLVM_INSTALL_UTILS=ON \ - -DLLVM_UTILS_INSTALL_DIR=libexec/llvm \ - -DLLVM_BUILD_LLVM_DYLIB=OFF \ - -DLLVM_LINK_LLVM_DYLIB=OFF \ - -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly \ - -DLLVM_ENABLE_FFI=ON \ - -DLLVM_ENABLE_Z3_SOLVER=OFF \ - -DLLVM_OPTIMIZED_TABLEGEN=ON \ - -DCMAKE_POLICY_DEFAULT_CMP0111=NEW \ - -DCOMPILER_RT_BUILD_BUILTINS=ON \ - -DCOMPILER_RT_BUILTINS_HIDE_SYMBOLS=OFF \ - -DCOMPILER_RT_BUILD_LIBFUZZER=OFF \ - -DCOMPILER_RT_BUILD_CRT=OFF \ - -DCOMPILER_RT_BUILD_MEMPROF=OFF \ - -DCOMPILER_RT_BUILD_PROFILE=OFF \ - -DCOMPILER_RT_BUILD_SANITIZERS=OFF \ - -DCOMPILER_RT_BUILD_XRAY=OFF \ - -DCOMPILER_RT_BUILD_GWP_ASAN=OFF \ - -DCOMPILER_RT_BUILD_ORC=OFF \ - -DCOMPILER_RT_INCLUDE_TESTS=OFF \ - ${CMAKE_ARGS} \ - -GNinja \ - ../llvm - - -ninja -j${CPU_COUNT} - -ninja install - -if [[ "${target_platform}" == "linux-64" || "${target_platform}" == "osx-64" ]]; then - export TEST_CPU_FLAG="-mcpu=haswell" -else - export TEST_CPU_FLAG="" -fi - -if [[ "$CONDA_BUILD_CROSS_COMPILATION" != "1" ]]; then - - echo "Testing on ${target_platform}" - # bin/opt -S -vector-library=SVML $TEST_CPU_FLAG -O3 $RECIPE_DIR/numba-3016.ll | bin/FileCheck $RECIPE_DIR/numba-3016.ll || exit $? - - if [[ "$target_platform" == linux* ]]; then - ln -s $(which $CC) $BUILD_PREFIX/bin/gcc - - # These tests tests permission-based behaviour and probably fail because of some - # filesystem-related reason. They are sporadic failures and don't seem serious so they're excluded. - # Note that indents would introduce spaces into the environment variable - export LIT_FILTER_OUT='tools/llvm-ar/error-opening-permission.test|'\ -'tools/llvm-dwarfdump/X86/output.s|'\ -'tools/llvm-ifs/fail-file-write.test|'\ -'tools/llvm-ranlib/error-opening-permission.test' - fi - - if [[ "$target_platform" == osx-* ]]; then - # This failure seems like something to do with the output format of ls -lu - # and looks harmless - export LIT_FILTER_OUT='tools/llvm-objcopy/ELF/strip-preserve-atime.test|'\ -'ExecutionEngine/Interpreter/intrinsics.ll' - fi - - cd ../llvm/test - ${PYTHON} ../../build/bin/llvm-lit -vv Transforms ExecutionEngine Analysis CodeGen/X86 -fi - diff --git a/conda-recipes/llvmdev_llvm15/conda_build_config.yaml b/conda-recipes/llvmdev_llvm15/conda_build_config.yaml deleted file mode 100644 index 1ae35296b..000000000 --- a/conda-recipes/llvmdev_llvm15/conda_build_config.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Numba/llvmlite stack needs an older compiler for backwards compatability. -c_compiler_version: # [linux] - - 7 # [linux and (x86_64 or ppc64le)] - - 9 # [linux and aarch64] - -cxx_compiler_version: # [linux] - - 7 # [linux and (x86_64 or ppc64le)] - - 9 # [linux and aarch64] - -fortran_compiler_version: # [linux] - - 7 # [linux and (x86_64 or ppc64le)] - - 9 # [linux and aarch64] - -c_compiler: # [win] - - vs2019 # [win] -cxx_compiler: # [win] - - vs2019 # [win] - -MACOSX_SDK_VERSION: # [osx and x86_64] - - 10.12 # [osx and x86_64] diff --git a/conda-recipes/llvmdev_llvm15/meta.yaml b/conda-recipes/llvmdev_llvm15/meta.yaml deleted file mode 100644 index 32d15a790..000000000 --- a/conda-recipes/llvmdev_llvm15/meta.yaml +++ /dev/null @@ -1,77 +0,0 @@ -{% set shortversion = "15.0" %} -{% set version = "15.0.7" %} -{% set sha256_llvm = "8b5fcb24b4128cf04df1b0b9410ce8b1a729cb3c544e6da885d234280dedeac6" %} -{% set build_number = "1" %} - -package: - name: llvmdev - version: {{ version }} - -source: - - url: https://github.com/llvm/llvm-project/releases/download/llvmorg-{{ version.replace(".rc", "-rc") }}/llvm-project-{{ version.replace(".rc", "rc") }}.src.tar.xz - sha256: {{ sha256_llvm }} - patches: - - ../llvm15-clear-gotoffsetmap.patch - - ../llvm15-remove-use-of-clonefile.patch - - ../llvm15-svml.patch - - ../compiler-rt-cfi-startproc-war.patch - - ../compiler-rt-macos-build.patch - - # Patches from conda-forge needed for windows to build - # backport of zlib patches, can be dropped for vs15.0.3, see - # https://reviews.llvm.org/D135457 & https://reviews.llvm.org/D136065 - - patches/0002-CMake-Fix-Findzstd-module-for-shared-DLL-on-Windows.patch - - patches/no-windows-symlinks.patch - -build: - number: {{ build_number }} - script_env: - - PY_VCRUNTIME_REDIST - ignore_run_exports: - # Is static-linked - - xar - -requirements: - build: - - {{ compiler('cxx') }} - - cmake - - ninja - - python >=3 - - libcxx # it is not defined{{ cxx_compiler_version }} # [osx] - - patch # [not win] - - m2-patch # [win] - - git # [(linux and x86_64)] - - host: - - libcxx # it is not defined{{ cxx_compiler_version }} # [osx] - - libffi # [unix] - # libxml2 supports a windows-only feature, see https://github.com/llvm/llvm-project/blob/llvmorg-17.0.6/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h - - libxml2 # [win] - - zlib - - zstd - -test: - files: - - numba-3016.ll - commands: - - $PREFIX/bin/llvm-config --libs # [not win] - - $PREFIX/bin/llc -version # [not win] - - - if not exist %LIBRARY_INC%\\llvm\\Pass.h exit 1 # [win] - - if not exist %LIBRARY_LIB%\\LLVMSupport.lib exit 1 # [win] - - - test -f $PREFIX/include/llvm/Pass.h # [unix] - - test -f $PREFIX/lib/libLLVMSupport.a # [unix] - - - test -f $PREFIX/lib/libLLVMCore.a # [not win] - - # LLD tests - - ld.lld --version # [unix] - - lld-link /? # [win] - -about: - home: http://llvm.org/ - dev_url: https://github.com/llvm-mirror/llvm - license: NCSA - license_file: llvm/LICENSE.TXT - summary: Development headers and libraries for LLVM diff --git a/conda-recipes/llvmdev_llvm15/numba-3016.ll b/conda-recipes/llvmdev_llvm15/numba-3016.ll deleted file mode 100644 index 1a9b3ecf8..000000000 --- a/conda-recipes/llvmdev_llvm15/numba-3016.ll +++ /dev/null @@ -1,80 +0,0 @@ -; Regression test for llvmdev-feedstock#52 and numba#3016 - -; Generated from C code: int a[1<<10],b[1<<10]; void foo() { int i=0; for(i=0; i<1<<10; i++) { b[i]=sin(a[i]); }} -; compiled: -fvectorize -fveclib=SVML -O -S -mavx -mllvm -disable-llvm-optzns -emit-llvm - -; RUN: opt -vector-library=SVML -mcpu=haswell -O3 -S < %s | FileCheck %s -; CHECK: call {{.*}}__svml_sin4_ha( -; CHECK-NOT: call {{.*}}__svml_sin4( -; CHECK-NOT: call {{.*}}__svml_sin8 - -source_filename = "svml-3016.c" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc-linux-gnu" - -@a = common dso_local global [1024 x i32] zeroinitializer, align 16 -@b = common dso_local global [1024 x i32] zeroinitializer, align 16 - -; Function Attrs: nounwind uwtable -define dso_local void @foo() #0 { - %1 = alloca i32, align 4 - %2 = bitcast i32* %1 to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* %2) #3 - store i32 0, i32* %1, align 4, !tbaa !2 - store i32 0, i32* %1, align 4, !tbaa !2 - br label %3 - -;