From 65bcb0318b2ea316f4bb578becc877051f61a122 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Fri, 28 Jun 2024 15:16:37 +0100
Subject: [PATCH 1/7] Use LLVM 15 by default, add experimental LLVM 16 support

Changes required to use LLVM 15 by default, and to support LLVM 16
experimentally, include:

CI config:
- Bump all LLVM 14 configurations to use LLVM 15
- Dump the LLVM 15 configurations to use LLVM 16

llvmlite:
- Always set opaque pointers to false (they are the default in both 15
  and 16, unlike 14).
- Don't initialize of `ObjCARCOpts` in LLVM 16 and above. This was
  removed in LLVM 16 by:

  ```
  commit 4153f989bab0f2f300fa8d3001ebeef7b6d9672c
  Author: Arthur Eubanks <aeubanks@google.com>
  Date:   Sun Oct 2 13:20:21 2022 -0700

      [ObjCARC] Remove legacy PM versions of optimization passes
  ```
- Remove the `AggressiveInstCombine` and `PruneEH` passes. These were
  removed from the legacy pass manager in LLVM 16 by:

  ```
  commit 70dc3b811e4926fa2c88bd3b53b29c46fcba1a90
  Author: Arthur Eubanks <aeubanks@google.com>
  Date:   Mon Oct 31 14:50:38 2022 -0700

      [AggressiveInstCombine] Remove legacy PM pass
  ```

  and

  ```
  commit 46fc75ab28b78a730ea21fd7daba6443937bfaac
  Author: Sebastian Peryt <sebastian.peryt@intel.com>
  Date:   Mon Sep 26 18:31:32 2022 -0700

      [NFC][2/n] Remove PrunePH pass
  ```
- Modify `reserveAllocationSpace` in the memory manager to use `Align`
  for the type of alignments in LLVM 16 - this mirrors an upstream
  change.
- Remove LLVM 14-specific code paths (and one vestigial LLVM < 9 path).
- Update the function attributes test to recognize the new form of
  memory attributes, - `memory(<action>)` as opposed to individual
  attributes like `readonly`. See:
  https://releases.llvm.org/16.0.0/docs/LangRef.html#function-attributes
---
 azure-pipelines.yml                           |  8 ++--
 buildscripts/azure/azure-windows.yml          |  4 +-
 buildscripts/incremental/build.cmd            |  2 +-
 .../incremental/setup_conda_environment.cmd   |  4 +-
 .../incremental/setup_conda_environment.sh    |  6 +--
 ffi/build.py                                  |  8 ++--
 ffi/core.cpp                                  |  4 --
 ffi/initfini.cpp                              |  2 +
 ffi/memorymanager.cpp                         | 39 ++++++++++++-------
 ffi/memorymanager.h                           | 15 +++++--
 ffi/orcjit.cpp                                |  9 -----
 ffi/passmanagers.cpp                          | 26 ++-----------
 ffi/targets.cpp                               |  1 +
 ffi/value.cpp                                 |  2 -
 llvmlite/binding/passmanagers.py              | 35 ++++++++++-------
 llvmlite/tests/test_binding.py                | 34 ++++++++--------
 16 files changed, 98 insertions(+), 101 deletions(-)
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 7e40fdedd..75fec8cd3 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -18,10 +18,10 @@ jobs:
         PYTHON: '3.12'
         CONDA_ENV: cienv
 
-      llvm15:
+      llvm16:
         PYTHON: '3.12'
         CONDA_ENV: cienv
-        LLVM: '15'
+        LLVM: '16'
 
 - template: buildscripts/azure/azure-linux-macos.yml
   parameters:
@@ -67,10 +67,10 @@ jobs:
         CONDA_ENV: cienv
         WHEEL: 'yes'
 
-      llvm15:
+      llvm16:
         PYTHON: '3.12'
         CONDA_ENV: cienv
-        LLVM: '15'
+        LLVM: '16'
 
 - template: buildscripts/azure/azure-windows.yml
   parameters:
diff --git a/buildscripts/azure/azure-windows.yml b/buildscripts/azure/azure-windows.yml
index fd61bb7a8..5f00cf9c3 100644
--- a/buildscripts/azure/azure-windows.yml
+++ b/buildscripts/azure/azure-windows.yml
@@ -22,10 +22,10 @@ jobs:
         PYTHON: '3.12'
         CONDA_ENV: cienv
 
-      llvm15:
+      llvm16:
         PYTHON: '3.12'
         CONDA_ENV: cienv
-        LLVM: '15'
+        LLVM: '16'
 
   steps:
 
diff --git a/buildscripts/incremental/build.cmd b/buildscripts/incremental/build.cmd
index 182cdde8f..fb3221895 100644
--- a/buildscripts/incremental/build.cmd
+++ b/buildscripts/incremental/build.cmd
@@ -15,7 +15,7 @@ call activate %CONDA_ENV%
 @rem - https://github.com/conda-forge/llvmdev-feedstock/issues/175
 @rem - https://github.com/conda-forge/llvmdev-feedstock/pull/223
 @rem - https://github.com/MicrosoftDocs/visualstudio-docs/issues/7774
-if "%LLVM%"=="15" (
+if "%LLVM%"=="16" (
   call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\VsDevCmd.bat"
   if %errorlevel% neq 0 exit /b %errorlevel%
 )
diff --git a/buildscripts/incremental/setup_conda_environment.cmd b/buildscripts/incremental/setup_conda_environment.cmd
index afd47c5f2..ac572f3a7 100644
--- a/buildscripts/incremental/setup_conda_environment.cmd
+++ b/buildscripts/incremental/setup_conda_environment.cmd
@@ -14,10 +14,10 @@ call activate %CONDA_ENV%
 if %errorlevel% neq 0 exit /b %errorlevel%
 
 @rem Install llvmdev
-if "%LLVM%"=="15" (
+if "%LLVM%"=="16" (
   set LLVMDEV_CHANNEL="conda-forge"
 ) else (
-  set LLVMDEV_CHANNEL="numba/label/dev"
+  set LLVMDEV_CHANNEL="numba"
 )
 
 call conda install -y -q -c %LLVMDEV_CHANNEL% llvmdev="%LLVM%" libxml2
diff --git a/buildscripts/incremental/setup_conda_environment.sh b/buildscripts/incremental/setup_conda_environment.sh
index 7dd431848..eec2c0fe7 100755
--- a/buildscripts/incremental/setup_conda_environment.sh
+++ b/buildscripts/incremental/setup_conda_environment.sh
@@ -27,10 +27,10 @@ source activate $CONDA_ENV
 set -v
 
 # Install llvmdev (separate channel, for now)
-if [ "$LLVM" == "15" ]; then
-    $CONDA_INSTALL -c conda-forge llvmdev="15"
+if [ "$LLVM" == "16" ]; then
+    $CONDA_INSTALL -c conda-forge llvmdev="16"
 else
-    $CONDA_INSTALL -c numba/label/dev llvmdev="14.*"
+    $CONDA_INSTALL -c numba llvmdev="15.*"
 fi
 
 # Install the compiler toolchain, for osx, bootstrapping needed
diff --git a/ffi/build.py b/ffi/build.py
index 97bdda426..61d68e3bc 100755
--- a/ffi/build.py
+++ b/ffi/build.py
@@ -167,13 +167,13 @@ def main_posix(kind, library_ext):
     else:
         (version, _) = out.split('.', 1)
         version = int(version)
-        if version == 15:
-            msg = ("Building with LLVM 15; note that LLVM 15 support is "
+        if version == 16:
+            msg = ("Building with LLVM 16; note that LLVM 16 support is "
                    "presently experimental")
             show_warning(msg)
-        elif version != 14:
+        elif version != 15:
 
-            msg = ("Building llvmlite requires LLVM 14, got "
+            msg = ("Building llvmlite requires LLVM 15, got "
                    "{!r}. Be sure to set LLVM_CONFIG to the right executable "
                    "path.\nRead the documentation at "
                    "http://llvmlite.pydata.org/ for more information about "
diff --git a/ffi/core.cpp b/ffi/core.cpp
index 0a4c5bb10..92fc09d87 100644
--- a/ffi/core.cpp
+++ b/ffi/core.cpp
@@ -23,18 +23,14 @@ LLVMPY_DisposeString(const char *msg) { free(const_cast<char *>(msg)); }
 API_EXPORT(LLVMContextRef)
 LLVMPY_GetGlobalContext() {
     auto context = LLVMGetGlobalContext();
-#if LLVM_VERSION_MAJOR > 14
     LLVMContextSetOpaquePointers(context, false);
-#endif
     return context;
 }
 
 API_EXPORT(LLVMContextRef)
 LLVMPY_ContextCreate() {
     LLVMContextRef context = LLVMContextCreate();
-#if LLVM_VERSION_MAJOR > 14
     LLVMContextSetOpaquePointers(context, false);
-#endif
     return context;
 }
 
diff --git a/ffi/initfini.cpp b/ffi/initfini.cpp
index ae13d292a..dc05e6724 100644
--- a/ffi/initfini.cpp
+++ b/ffi/initfini.cpp
@@ -15,7 +15,9 @@ extern "C" {
 INIT(Core)
 INIT(TransformUtils)
 INIT(ScalarOpts)
+#if LLVM_VERSION_MAJOR < 16
 INIT(ObjCARCOpts)
+#endif
 INIT(Vectorization)
 INIT(InstCombine)
 INIT(IPO)
diff --git a/ffi/memorymanager.cpp b/ffi/memorymanager.cpp
index 3163e2430..3f3ee79c7 100644
--- a/ffi/memorymanager.cpp
+++ b/ffi/memorymanager.cpp
@@ -129,20 +129,23 @@ bool LlvmliteMemoryManager::hasSpace(const MemoryGroup &MemGroup,
     return false;
 }
 
-void LlvmliteMemoryManager::reserveAllocationSpace(
-    uintptr_t CodeSize, uint32_t CodeAlign, uintptr_t RODataSize,
-    uint32_t RODataAlign, uintptr_t RWDataSize, uint32_t RWDataAlign) {
+void LlvmliteMemoryManager::reserveAllocationSpace(uintptr_t CodeSize,
+                                                   LLVMLITE_ALIGN CodeAlign,
+                                                   uintptr_t RODataSize,
+                                                   LLVMLITE_ALIGN RODataAlign,
+                                                   uintptr_t RWDataSize,
+                                                   LLVMLITE_ALIGN RWDataAlign) {
     LLVM_DEBUG(
         dbgs()
         << "\nLlvmliteMemoryManager::reserveAllocationSpace() request:\n\n");
     LLVM_DEBUG(dbgs() << "Code size / align: " << format_hex(CodeSize, 2, true)
-                      << " / " << CodeAlign << "\n");
+                      << " / " << GET_ALIGN_VALUE(CodeAlign) << "\n");
     LLVM_DEBUG(dbgs() << "ROData size / align: "
-                      << format_hex(RODataSize, 2, true) << " / " << RODataAlign
-                      << "\n");
+                      << format_hex(RODataSize, 2, true) << " / "
+                      << GET_ALIGN_VALUE(RODataAlign) << "\n");
     LLVM_DEBUG(dbgs() << "RWData size / align: "
-                      << format_hex(RWDataSize, 2, true) << " / " << RWDataAlign
-                      << "\n");
+                      << format_hex(RWDataSize, 2, true) << " / "
+                      << GET_ALIGN_VALUE(RWDataAlign) << "\n");
 
     if (CodeSize == 0 && RODataSize == 0 && RWDataSize == 0) {
         LLVM_DEBUG(dbgs() << "No memory requested - returning early.\n");
@@ -152,23 +155,31 @@ void LlvmliteMemoryManager::reserveAllocationSpace(
     // Code alignment needs to be at least the stub alignment - however, we
     // don't have an easy way to get that here so as a workaround, we assume
     // it's 8, which is the largest value I observed across all platforms.
+#if LLVM_VERSION_MAJOR < 16
     constexpr uint32_t StubAlign = 8;
-    CodeAlign = std::max(CodeAlign, StubAlign);
+#else
+    constexpr uint64_t StubAlign = 8;
+#endif
+
+    CodeAlign = LLVMLITE_ALIGN(std::max(GET_ALIGN_VALUE(CodeAlign), StubAlign));
 
     // ROData and RWData may not need to be aligned to the StubAlign, but the
     // stub alignment seems like a reasonable (if slightly arbitrary) minimum
     // alignment for them that should not cause any issues on all (i.e. 64-bit)
     // platforms.
-    RODataAlign = std::max(RODataAlign, StubAlign);
-    RWDataAlign = std::max(RWDataAlign, StubAlign);
+    RODataAlign =
+        LLVMLITE_ALIGN(std::max(GET_ALIGN_VALUE(RODataAlign), StubAlign));
+    RWDataAlign =
+        LLVMLITE_ALIGN(std::max(GET_ALIGN_VALUE(RWDataAlign), StubAlign));
 
     // Get space required for each section. Use the same calculation as
     // allocateSection because we need to be able to satisfy it.
-    uintptr_t RequiredCodeSize = alignTo(CodeSize, CodeAlign) + CodeAlign;
+    uintptr_t RequiredCodeSize =
+        alignTo(CodeSize, CodeAlign) + GET_ALIGN_VALUE(CodeAlign);
     uintptr_t RequiredRODataSize =
-        alignTo(RODataSize, RODataAlign) + RODataAlign;
+        alignTo(RODataSize, RODataAlign) + GET_ALIGN_VALUE(RODataAlign);
     uintptr_t RequiredRWDataSize =
-        alignTo(RWDataSize, RWDataAlign) + RWDataAlign;
+        alignTo(RWDataSize, RWDataAlign) + GET_ALIGN_VALUE(RWDataAlign);
     uint64_t TotalSize =
         RequiredCodeSize + RequiredRODataSize + RequiredRWDataSize;
 
diff --git a/ffi/memorymanager.h b/ffi/memorymanager.h
index 9ed028542..c0bdddaab 100644
--- a/ffi/memorymanager.h
+++ b/ffi/memorymanager.h
@@ -42,6 +42,14 @@ class __attribute__((visibility("default"))) ErrorInfoBase;
 #include <string>
 #include <system_error>
 
+#if LLVM_VERSION_MAJOR < 16
+#define LLVMLITE_ALIGN uint32_t
+#define GET_ALIGN_VALUE(align) align
+#else
+#define LLVMLITE_ALIGN Align
+#define GET_ALIGN_VALUE(align) align.value()
+#endif
+
 namespace llvm {
 
 /// This is a simple memory manager which implements the methods called by
@@ -174,11 +182,12 @@ class API_EXPORT(LlvmliteMemoryManager : public RTDyldMemoryManager) {
 
     virtual bool needsToReserveAllocationSpace() override { return true; }
 
-    virtual void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
+    virtual void reserveAllocationSpace(uintptr_t CodeSize,
+                                        LLVMLITE_ALIGN CodeAlign,
                                         uintptr_t RODataSize,
-                                        uint32_t RODataAlign,
+                                        LLVMLITE_ALIGN RODataAlign,
                                         uintptr_t RWDataSize,
-                                        uint32_t RWDataAlign) override;
+                                        LLVMLITE_ALIGN RWDataAlign) override;
 
   private:
     struct FreeMemBlock {
diff --git a/ffi/orcjit.cpp b/ffi/orcjit.cpp
index 60a4d8f3a..e19d6b97d 100644
--- a/ffi/orcjit.cpp
+++ b/ffi/orcjit.cpp
@@ -158,12 +158,7 @@ LLVMPY_LLJITLookup(std::shared_ptr<LLJIT> *lljit, const char *dylib_name,
         return nullptr;
     }
 
-#if LLVM_VERSION_MAJOR > 14
     *addr = sym->getValue();
-#else
-    *addr = sym->getAddress();
-#endif
-
     return new JITDylibTracker(*lljit, *dylib,
                                std::move(dylib->createResourceTracker()));
 }
@@ -339,11 +334,7 @@ LLVMPY_LLJIT_Link(std::shared_ptr<LLJIT> *lljit, const char *libraryName,
             LLVMDisposeErrorMessage(message);
             return nullptr;
         }
-#if LLVM_VERSION_MAJOR > 14
         exports[export_idx].address = lookup->getValue();
-#else
-        exports[export_idx].address = lookup->getAddress();
-#endif
     }
     return new JITDylibTracker(*lljit, *dylib,
                                std::move(dylib->getDefaultResourceTracker()));
diff --git a/ffi/passmanagers.cpp b/ffi/passmanagers.cpp
index da4a076b4..3e3c1c1bb 100644
--- a/ffi/passmanagers.cpp
+++ b/ffi/passmanagers.cpp
@@ -162,13 +162,8 @@ LLVMPY_AddCallGraphDOTPrinterPass(LLVMPassManagerRef PM) {
 
 API_EXPORT(void)
 LLVMPY_AddDotDomPrinterPass(LLVMPassManagerRef PM, bool showBody) {
-#if LLVM_VERSION_MAJOR > 14
     unwrap(PM)->add(showBody ? llvm::createDomPrinterWrapperPassPass()
                              : llvm::createDomOnlyPrinterWrapperPassPass());
-#else
-    unwrap(PM)->add(showBody ? llvm::createDomPrinterPass()
-                             : llvm::createDomOnlyPrinterPass());
-#endif
 }
 
 API_EXPORT(void)
@@ -178,13 +173,8 @@ LLVMPY_AddGlobalsModRefAAPass(LLVMPassManagerRef PM) {
 
 API_EXPORT(void)
 LLVMPY_AddDotPostDomPrinterPass(LLVMPassManagerRef PM, bool showBody) {
-#if LLVM_VERSION_MAJOR > 14
     unwrap(PM)->add(showBody ? llvm::createPostDomPrinterWrapperPassPass()
                              : llvm::createPostDomOnlyPrinterWrapperPassPass());
-#else
-    unwrap(PM)->add(showBody ? llvm::createPostDomPrinterPass()
-                             : llvm::createPostDomOnlyPrinterPass());
-#endif
 }
 
 API_EXPORT(void)
@@ -255,13 +245,6 @@ LLVMPY_AddAlwaysInlinerPass(LLVMPassManagerRef PM, bool insertLifetime) {
     unwrap(PM)->add(llvm::createAlwaysInlinerLegacyPass(insertLifetime));
 }
 
-#if LLVM_VERSION_MAJOR < 15
-API_EXPORT(void)
-LLVMPY_AddArgPromotionPass(LLVMPassManagerRef PM, unsigned int maxElements) {
-    unwrap(PM)->add(llvm::createArgumentPromotionPass(maxElements));
-}
-#endif
-
 API_EXPORT(void)
 LLVMPY_AddBreakCriticalEdgesPass(LLVMPassManagerRef PM) {
     unwrap(PM)->add(llvm::createBreakCriticalEdgesPass());
@@ -293,10 +276,12 @@ LLVMPY_AddDeadCodeEliminationPass(LLVMPassManagerRef PM) {
     unwrap(PM)->add(createDeadCodeEliminationPass());
 }
 
+#if LLVM_VERSION_MAJOR < 16
 API_EXPORT(void)
 LLVMPY_AddAggressiveInstructionCombiningPass(LLVMPassManagerRef PM) {
     unwrap(PM)->add(createAggressiveInstCombinerPass());
 }
+#endif
 
 API_EXPORT(void)
 LLVMPY_AddInternalizePass(LLVMPassManagerRef PM) {
@@ -349,12 +334,7 @@ LLVMPY_AddLoopUnrollAndJamPass(LLVMPassManagerRef PM) {
 API_EXPORT(void)
 LLVMPY_AddLoopUnswitchPass(LLVMPassManagerRef PM, bool optimizeForSize,
                            bool hasBranchDivergence) {
-#if LLVM_VERSION_MAJOR > 14
     unwrap(PM)->add(createSimpleLoopUnswitchLegacyPass(optimizeForSize));
-#else
-    unwrap(PM)->add(
-        createLoopUnswitchPass(optimizeForSize, hasBranchDivergence));
-#endif
 }
 
 API_EXPORT(void)
@@ -392,10 +372,12 @@ LLVMPY_AddPartialInliningPass(LLVMPassManagerRef PM) {
     unwrap(PM)->add(createPartialInliningPass());
 }
 
+#if LLVM_VERSION_MAJOR < 16
 API_EXPORT(void)
 LLVMPY_AddPruneExceptionHandlingPass(LLVMPassManagerRef PM) {
     unwrap(PM)->add(createPruneEHPass());
 }
+#endif
 
 API_EXPORT(void)
 LLVMPY_AddReassociatePass(LLVMPassManagerRef PM) {
diff --git a/ffi/targets.cpp b/ffi/targets.cpp
index 1b1bbf9f1..da7ba521c 100644
--- a/ffi/targets.cpp
+++ b/ffi/targets.cpp
@@ -1,6 +1,7 @@
 #include "core.h"
 #include "llvm-c/Target.h"
 #include "llvm-c/TargetMachine.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/LegacyPassManager.h"
diff --git a/ffi/value.cpp b/ffi/value.cpp
index 2f86806bd..b59e33ae0 100644
--- a/ffi/value.cpp
+++ b/ffi/value.cpp
@@ -6,9 +6,7 @@
 
 // the following is needed for WriteGraph()
 #include "llvm/Analysis/CFGPrinter.h"
-#if LLVM_VERSION_MAJOR > 14
 #include "llvm/Support/GraphWriter.h"
-#endif
 
 /* An iterator around a attribute list, including the stop condition */
 struct AttributeListIterator {
diff --git a/llvmlite/binding/passmanagers.py b/llvmlite/binding/passmanagers.py
index af6152f63..92aa06738 100644
--- a/llvmlite/binding/passmanagers.py
+++ b/llvmlite/binding/passmanagers.py
@@ -1,4 +1,4 @@
-from ctypes import (c_bool, c_char_p, c_int, c_size_t, c_uint, Structure, byref,
+from ctypes import (c_bool, c_char_p, c_int, c_size_t, Structure, byref,
                     POINTER)
 from collections import namedtuple
 from enum import IntFlag
@@ -8,11 +8,11 @@
 from tempfile import mkstemp
 from llvmlite.binding.common import _encode_string
 
+llvm_version_major = llvm_version_info[0]
+
 _prunestats = namedtuple('PruneStats',
                          ('basicblock diamond fanout fanout_raise'))
 
-llvm_version_major = llvm_version_info[0]
-
 
 class PruneStats(_prunestats):
     """ Holds statistics from reference count pruning.
@@ -261,9 +261,7 @@ def add_arg_promotion_pass(self, max_elements=3):
 
         LLVM 14: `llvm::createArgumentPromotionPass`
         """  # noqa E501
-        if llvm_version_major > 14:
-            raise RuntimeError('ArgumentPromotionPass unavailable in LLVM > 14')
-        ffi.lib.LLVMPY_AddArgPromotionPass(self, max_elements)
+        raise RuntimeError('ArgumentPromotionPass unavailable in LLVM > 14')
 
     def add_break_critical_edges_pass(self):
         """
@@ -342,6 +340,10 @@ def add_aggressive_instruction_combining_pass(self):
 
         LLVM 14: `llvm::createAggressiveInstCombinerPass`
         """  # noqa E501
+        if llvm_version_major > 15:
+            msg = "AggressiveInstrCombinerPass unavailable in LLVM > 15"
+            raise RuntimeError(msg)
+
         ffi.lib.LLVMPY_AddAggressiveInstructionCombiningPass(self)
 
     def add_internalize_pass(self):
@@ -538,6 +540,8 @@ def add_prune_exception_handling_pass(self):
 
         LLVM 14: `llvm::createPruneEHPass`
         """  # noqa E501
+        if llvm_version_major > 15:
+            raise RuntimeError("PruneEHPass unavailable in LLVM > 15")
         ffi.lib.LLVMPY_AddPruneExceptionHandlingPass(self)
 
     def add_reassociate_expressions_pass(self):
@@ -871,18 +875,16 @@ def run_with_remarks(self, function, remarks_format='yaml',
 ffi.lib.LLVMPY_AddScalarEvolutionAAPass.argtypes = [ffi.LLVMPassManagerRef]
 ffi.lib.LLVMPY_AddAggressiveDCEPass.argtypes = [ffi.LLVMPassManagerRef]
 ffi.lib.LLVMPY_AddAlwaysInlinerPass.argtypes = [ffi.LLVMPassManagerRef, c_bool]
-
-if llvm_version_major < 15:
-    ffi.lib.LLVMPY_AddArgPromotionPass.argtypes = [
-        ffi.LLVMPassManagerRef, c_uint]
-
 ffi.lib.LLVMPY_AddBreakCriticalEdgesPass.argtypes = [ffi.LLVMPassManagerRef]
 ffi.lib.LLVMPY_AddDeadStoreEliminationPass.argtypes = [
     ffi.LLVMPassManagerRef]
 ffi.lib.LLVMPY_AddReversePostOrderFunctionAttrsPass.argtypes = [
     ffi.LLVMPassManagerRef]
-ffi.lib.LLVMPY_AddAggressiveInstructionCombiningPass.argtypes = [
-    ffi.LLVMPassManagerRef]
+
+if llvm_version_major < 16:
+    ffi.lib.LLVMPY_AddAggressiveInstructionCombiningPass.argtypes = [
+        ffi.LLVMPassManagerRef]
+
 ffi.lib.LLVMPY_AddInternalizePass.argtypes = [ffi.LLVMPassManagerRef]
 ffi.lib.LLVMPY_AddLCSSAPass.argtypes = [ffi.LLVMPassManagerRef]
 ffi.lib.LLVMPY_AddLoopDeletionPass.argtypes = [ffi.LLVMPassManagerRef]
@@ -901,7 +903,12 @@ def run_with_remarks(self, function, remarks_format='yaml',
 ffi.lib.LLVMPY_AddMergeFunctionsPass.argtypes = [ffi.LLVMPassManagerRef]
 ffi.lib.LLVMPY_AddMergeReturnsPass.argtypes = [ffi.LLVMPassManagerRef]
 ffi.lib.LLVMPY_AddPartialInliningPass.argtypes = [ffi.LLVMPassManagerRef]
-ffi.lib.LLVMPY_AddPruneExceptionHandlingPass.argtypes = [ffi.LLVMPassManagerRef]
+
+if llvm_version_major < 16:
+    ffi.lib.LLVMPY_AddPruneExceptionHandlingPass.argtypes = [
+        ffi.LLVMPassManagerRef
+    ]
+
 ffi.lib.LLVMPY_AddReassociatePass.argtypes = [ffi.LLVMPassManagerRef]
 ffi.lib.LLVMPY_AddDemoteRegisterToMemoryPass.argtypes = [ffi.LLVMPassManagerRef]
 ffi.lib.LLVMPY_AddSinkPass.argtypes = [ffi.LLVMPassManagerRef]
diff --git a/llvmlite/tests/test_binding.py b/llvmlite/tests/test_binding.py
index cfa6bdeae..0dfbeede9 100644
--- a/llvmlite/tests/test_binding.py
+++ b/llvmlite/tests/test_binding.py
@@ -18,8 +18,6 @@
 from llvmlite.binding import ffi
 from llvmlite.tests import TestCase
 
-llvm_version_major = llvm.llvm_version_info[0]
-
 # arvm7l needs extra ABI symbols to link successfully
 if platform.machine() == 'armv7l':
     llvm.load_library_permanently('libgcc_s.so.1')
@@ -883,7 +881,7 @@ def test_set_option(self):
     def test_version(self):
         major, minor, patch = llvm.llvm_version_info
         # one of these can be valid
-        valid = (14, 15)
+        valid = (15, 16)
         self.assertIn(major, valid)
         self.assertIn(patch, range(8))
 
@@ -1082,13 +1080,9 @@ def test_parse_bitcode_error(self):
         with self.assertRaises(RuntimeError) as cm:
             llvm.parse_bitcode(b"")
         self.assertIn("LLVM bitcode parsing error", str(cm.exception))
-        # for llvm < 9
-        if llvm.llvm_version_info[0] < 9:
-            self.assertIn("Invalid bitcode signature", str(cm.exception))
-        else:
-            self.assertIn(
-                "file too small to contain bitcode header", str(cm.exception),
-            )
+        self.assertIn(
+            "file too small to contain bitcode header", str(cm.exception),
+        )
 
     def test_bitcode_roundtrip(self):
         # create a new context to avoid struct renaming
@@ -1709,11 +1703,13 @@ def test_instruction_operands(self):
         self.assertEqual(str(operands[1].type), 'i32')
 
     def test_function_attributes(self):
+        ver = llvm.llvm_version_info[0]
+        readonly_attrs = [b'memory(read)' if ver == 16 else b'readonly']
         mod = self.module(asm_attributes)
         for func in mod.functions:
             attrs = list(func.attributes)
             if func.name == 'a_readonly_func':
-                self.assertEqual(attrs, [b'readonly'])
+                self.assertEqual(attrs, readonly_attrs)
             elif func.name == 'a_arg0_return_func':
                 self.assertEqual(attrs, [])
                 args = list(func.arguments)
@@ -2462,6 +2458,8 @@ def pm(self):
         return llvm.create_module_pass_manager()
 
     def test_populate(self):
+        llvm_ver = llvm.llvm_version_info[0]
+
         pm = self.pm()
         pm.add_target_library_info("") # unspecified target triple
         pm.add_constant_merge_pass()
@@ -2486,12 +2484,13 @@ def test_populate(self):
         pm.add_aggressive_dead_code_elimination_pass()
         pm.add_aa_eval_pass()
         pm.add_always_inliner_pass()
-        if llvm_version_major < 15:
-            pm.add_arg_promotion_pass(42)
         pm.add_break_critical_edges_pass()
         pm.add_dead_store_elimination_pass()
         pm.add_reverse_post_order_function_attrs_pass()
-        pm.add_aggressive_instruction_combining_pass()
+
+        if llvm_ver < 16:
+            pm.add_aggressive_instruction_combining_pass()
+
         pm.add_internalize_pass()
         pm.add_jump_threading_pass(7)
         pm.add_lcssa_pass()
@@ -2502,8 +2501,6 @@ def test_populate(self):
         pm.add_loop_simplification_pass()
         pm.add_loop_unroll_pass()
         pm.add_loop_unroll_and_jam_pass()
-        if llvm_version_major < 15:
-            pm.add_loop_unswitch_pass()
         pm.add_lower_atomic_pass()
         pm.add_lower_invoke_pass()
         pm.add_lower_switch_pass()
@@ -2511,7 +2508,10 @@ def test_populate(self):
         pm.add_merge_functions_pass()
         pm.add_merge_returns_pass()
         pm.add_partial_inlining_pass()
-        pm.add_prune_exception_handling_pass()
+
+        if llvm_ver < 16:
+            pm.add_prune_exception_handling_pass()
+
         pm.add_reassociate_expressions_pass()
         pm.add_demote_register_to_memory_pass()
         pm.add_sink_pass()

From d75760d228b16b5587a634c2cf335f3a29dc6f3d Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Fri, 5 Jul 2024 14:15:37 +0100
Subject: [PATCH 2/7] Try setting CMake C++ standard to 17 to fix Windows LLVM
 16 build

---
 ffi/CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ffi/CMakeLists.txt b/ffi/CMakeLists.txt
index 907b1e1ec..c1b7b2315 100755
--- a/ffi/CMakeLists.txt
+++ b/ffi/CMakeLists.txt
@@ -11,6 +11,8 @@ if(NOT MSVC)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -g")
 endif()
 
+set(CMAKE_CXX_STANDARD 17)
+
 # Work around llvm/llvm-project#83802 - LLVM's Findzstd.cmake uses variables
 # that require including `GNUInstallDirs`, but it does not include it itself.
 include(GNUInstallDirs)

From f3564f3c27bf8e0c14c0fb404f32b99435463bd5 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Fri, 5 Jul 2024 14:41:15 +0100
Subject: [PATCH 3/7] Use LLVM 15 on RTD

---
 .readthedocs.yaml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 43152be07..9b42aaaeb 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -8,6 +8,11 @@ build:
   os: ubuntu-22.04
   tools:
     python: "3.11"
+  apt_packages:
+    - llvm-15
+  jobs:
+    pre_build:
+      - update-alternatives --install /usr/bin/llvm-config llvm-config /usr/bin/llvm-config-15 200
 
 sphinx:
   configuration: docs/source/conf.py

From 587c885f7701ec40637ebbf42e496d1b5adfc641 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Fri, 5 Jul 2024 14:48:19 +0100
Subject: [PATCH 4/7] Set LLVM version to 15 in post_system_dependencies on RTD

---
 .readthedocs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 9b42aaaeb..b729cfff7 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -11,7 +11,7 @@ build:
   apt_packages:
     - llvm-15
   jobs:
-    pre_build:
+    post_system_dependencies:
       - update-alternatives --install /usr/bin/llvm-config llvm-config /usr/bin/llvm-config-15 200
 
 sphinx:

From 29fcbb86ed857f42ffec1ed4013f9e4406fc9973 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Fri, 5 Jul 2024 15:09:10 +0100
Subject: [PATCH 5/7] Try editing build.py on RTD instead

---
 .readthedocs.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index b729cfff7..9892bd5eb 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -11,8 +11,8 @@ build:
   apt_packages:
     - llvm-15
   jobs:
-    post_system_dependencies:
-      - update-alternatives --install /usr/bin/llvm-config llvm-config /usr/bin/llvm-config-15 200
+    post_checkout:
+      - sed -i "s/'llvm-config'/'llvm-config-15'/g" ffi/build.py
 
 sphinx:
   configuration: docs/source/conf.py

From 98452e2a63b286133b3d712c304e485ca289c666 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Mon, 15 Jul 2024 11:55:15 +0100
Subject: [PATCH 6/7] Remove llvmdev 14 recipe

Replace with llvmdev recipe for LLVM 15.
---
 conda-recipes/llvm14-clear-gotoffsetmap.patch |   31 -
 .../llvm14-remove-use-of-clonefile.patch      |   54 -
 conda-recipes/llvm14-svml.patch               | 2194 -----------------
 conda-recipes/llvmdev/bld.bat                 |  135 +-
 conda-recipes/llvmdev/build.sh                |  172 +-
 conda-recipes/llvmdev/conda_build_config.yaml |    8 +
 conda-recipes/llvmdev/meta.yaml               |   62 +-
 ...std-module-for-shared-DLL-on-Windows.patch |    0
 .../patches/no-windows-symlinks.patch         |    0
 conda-recipes/llvmdev_llvm15/bld.bat          |   59 -
 conda-recipes/llvmdev_llvm15/build.sh         |  114 -
 .../llvmdev_llvm15/conda_build_config.yaml    |   20 -
 conda-recipes/llvmdev_llvm15/meta.yaml        |   77 -
 conda-recipes/llvmdev_llvm15/numba-3016.ll    |   80 -
 14 files changed, 179 insertions(+), 2827 deletions(-)
 delete mode 100644 conda-recipes/llvm14-clear-gotoffsetmap.patch
 delete mode 100644 conda-recipes/llvm14-remove-use-of-clonefile.patch
 delete mode 100644 conda-recipes/llvm14-svml.patch
 rename conda-recipes/{llvmdev_llvm15 => llvmdev}/patches/0002-CMake-Fix-Findzstd-module-for-shared-DLL-on-Windows.patch (100%)
 rename conda-recipes/{llvmdev_llvm15 => llvmdev}/patches/no-windows-symlinks.patch (100%)
 delete mode 100644 conda-recipes/llvmdev_llvm15/bld.bat
 delete mode 100644 conda-recipes/llvmdev_llvm15/build.sh
 delete mode 100644 conda-recipes/llvmdev_llvm15/conda_build_config.yaml
 delete mode 100644 conda-recipes/llvmdev_llvm15/meta.yaml
 delete mode 100644 conda-recipes/llvmdev_llvm15/numba-3016.ll

diff --git a/conda-recipes/llvm14-clear-gotoffsetmap.patch b/conda-recipes/llvm14-clear-gotoffsetmap.patch
deleted file mode 100644
index 239f4ab20..000000000
--- a/conda-recipes/llvm14-clear-gotoffsetmap.patch
+++ /dev/null
@@ -1,31 +0,0 @@
-From 322c79fff224389b4df9f24ac22965867007c2fa Mon Sep 17 00:00:00 2001
-From: Graham Markall <gmarkall@nvidia.com>
-Date: Mon, 13 Mar 2023 21:35:11 +0000
-Subject: [PATCH] RuntimeDyldELF: Clear the GOTOffsetMap when finalizing the
- load
-
-This needs resetting so that stale entries are not left behind when the
-GOT section and index are reset.
-
-See llvm/llvm#61402: RuntimeDyldELF doesn't clear GOTOffsetMap in
-finalizeLoad(), leading to invalid GOT relocations on AArch64 -
-https://github.com/llvm/llvm-project/issues/61402.
----
- llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/llvm-14.0.6.src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm-14.0.6.src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
-index f92618afdff6..eb3c27a9406a 100644
---- a/llvm-14.0.6.src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
-+++ b/llvm-14.0.6.src/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
-@@ -2345,6 +2345,7 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
-     }
-   }
- 
-+  GOTOffsetMap.clear();
-   GOTSectionID = 0;
-   CurrentGOTIndex = 0;
- 
--- 
-2.34.1
-
diff --git a/conda-recipes/llvm14-remove-use-of-clonefile.patch b/conda-recipes/llvm14-remove-use-of-clonefile.patch
deleted file mode 100644
index 6ef9c9d61..000000000
--- a/conda-recipes/llvm14-remove-use-of-clonefile.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-diff -ur a/llvm-14.0.6.src/lib/Support/Unix/Path.inc b/llvm-14.0.6.src/lib/Support/Unix/Path.inc
---- a/llvm-14.0.6.src/lib/Support/Unix/Path.inc	2022-03-14 05:44:55.000000000 -0400
-+++ b/llvm-14.0.6.src/lib/Support/Unix/Path.inc	2022-09-19 11:30:59.000000000 -0400
-@@ -1462,6 +1462,7 @@
- std::error_code copy_file(const Twine &From, const Twine &To) {
-   std::string FromS = From.str();
-   std::string ToS = To.str();
-+  /*
- #if __has_builtin(__builtin_available)
-   if (__builtin_available(macos 10.12, *)) {
-     // Optimistically try to use clonefile() and handle errors, rather than
-@@ -1490,6 +1491,7 @@
-     // cheaper.
-   }
- #endif
-+  */
-   if (!copyfile(FromS.c_str(), ToS.c_str(), /*State=*/NULL, COPYFILE_DATA))
-     return std::error_code();
-   return std::error_code(errno, std::generic_category());
-diff -ur a/llvm-14.0.6.src/unittests/Support/Path.cpp b/llvm-14.0.6.src/unittests/Support/Path.cpp
---- a/llvm-14.0.6.src/unittests/Support/Path.cpp	2022-03-14 05:44:55.000000000 -0400
-+++ b/llvm-14.0.6.src/unittests/Support/Path.cpp	2022-09-19 11:33:07.000000000 -0400
-@@ -2267,15 +2267,15 @@
- 
-   EXPECT_EQ(fs::setPermissions(TempPath, fs::set_uid_on_exe), NoError);
-   EXPECT_TRUE(CheckPermissions(fs::set_uid_on_exe));
--
-+#if !defined(__APPLE__)
-   EXPECT_EQ(fs::setPermissions(TempPath, fs::set_gid_on_exe), NoError);
-   EXPECT_TRUE(CheckPermissions(fs::set_gid_on_exe));
--
-+#endif
-   // Modern BSDs require root to set the sticky bit on files.
-   // AIX and Solaris without root will mask off (i.e., lose) the sticky bit
-   // on files.
- #if !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) &&  \
--    !defined(_AIX) && !(defined(__sun__) && defined(__svr4__))
-+    !defined(_AIX) && !(defined(__sun__) && defined(__svr4__)) && !defined(__APPLE__)
-   EXPECT_EQ(fs::setPermissions(TempPath, fs::sticky_bit), NoError);
-   EXPECT_TRUE(CheckPermissions(fs::sticky_bit));
- 
-@@ -2297,10 +2297,12 @@
-   EXPECT_TRUE(CheckPermissions(fs::all_perms));
- #endif // !FreeBSD && !NetBSD && !OpenBSD && !AIX
- 
-+#if !defined(__APPLE__)
-   EXPECT_EQ(fs::setPermissions(TempPath, fs::all_perms & ~fs::sticky_bit),
-                                NoError);
-   EXPECT_TRUE(CheckPermissions(fs::all_perms & ~fs::sticky_bit));
- #endif
-+#endif
- }
- 
- #ifdef _WIN32
diff --git a/conda-recipes/llvm14-svml.patch b/conda-recipes/llvm14-svml.patch
deleted file mode 100644
index c753d3f59..000000000
--- a/conda-recipes/llvm14-svml.patch
+++ /dev/null
@@ -1,2194 +0,0 @@
-From 9de32f5474f1f78990b399214bdbb6c21f8f098e Mon Sep 17 00:00:00 2001
-From: Ivan Butygin <ivan.butygin@gmail.com>
-Date: Sun, 24 Jul 2022 20:31:29 +0200
-Subject: [PATCH] Fixes vectorizer and extends SVML support
-
-Fixes vectorizer and extends SVML support
-Patch was updated to fix SVML calling convention issues uncovered by llvm 10.
-In previous versions of patch SVML calling convention was selected based on
-compilation settings. So if you try to call 256bit vector function from avx512
-code function will be called with avx512 cc which is incorrect. To fix this
-SVML cc was separated into 3 different cc for 128, 256 and 512bit vector lengths
-which are selected based on actual input vector length.
-
-Original patch merged several fixes:
-
-1. https://reviews.llvm.org/D47188 patch fixes the problem with improper calls
-to SVML library as it has non-standard calling conventions. So accordingly it
-has SVML calling conventions definitions and code to set CC to the vectorized
-calls. As SVML provides several implementations for the math functions we also
-took into consideration fast attribute and select more fast implementation in
-such case. This work is based on original Matt Masten's work.
-Author: Denis Nagorny
-
-2. https://reviews.llvm.org/D53035 patch implements support to legalize SVML
-calls by breaking down the illegal vector call instruction into multiple legal
-vector call instructions during code generation. Currently the vectorizer does
-not check legality of the generated SVML (or any VECLIB) call instructions, and
-this can lead to potential problems even during vector type legalization. This
-patch addresses this issue by adding a legality check during code generation and
-replaces the illegal SVML call with corresponding legalized instructions.
-(RFC: http://lists.llvm.org/pipermail/llvm-dev/2018-June/124357.html)
-Author: Karthik Senthil
-
-diff --git a/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h b/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h
-index 17d1e3f770c14..110ff08189867 100644
---- a/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h
-+++ b/llvm-14.0.6.src/include/llvm/Analysis/TargetLibraryInfo.h
-@@ -39,6 +39,12 @@ struct VecDesc {
-     NotLibFunc
-   };
- 
-+enum SVMLAccuracy {
-+  SVML_DEFAULT,
-+  SVML_HA,
-+  SVML_EP
-+};
-+
- /// Implementation of the target library information.
- ///
- /// This class constructs tables that hold the target library information and
-@@ -157,7 +163,7 @@ class TargetLibraryInfoImpl {
-   /// Return true if the function F has a vector equivalent with vectorization
-   /// factor VF.
-   bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const {
--    return !getVectorizedFunction(F, VF).empty();
-+    return !getVectorizedFunction(F, VF, false).empty();
-   }
- 
-   /// Return true if the function F has a vector equivalent with any
-@@ -166,7 +172,10 @@ class TargetLibraryInfoImpl {
- 
-   /// Return the name of the equivalent of F, vectorized with factor VF. If no
-   /// such mapping exists, return the empty string.
--  StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const;
-+  std::string getVectorizedFunction(StringRef F, const ElementCount &VF, bool IsFast) const;
-+
-+  Optional<CallingConv::ID> getVectorizedFunctionCallingConv(
-+    StringRef F, const FunctionType &FTy, const DataLayout &DL) const;
- 
-   /// Set to true iff i32 parameters to library functions should have signext
-   /// or zeroext attributes if they correspond to C-level int or unsigned int,
-@@ -326,8 +335,13 @@ class TargetLibraryInfo {
-   bool isFunctionVectorizable(StringRef F) const {
-     return Impl->isFunctionVectorizable(F);
-   }
--  StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const {
--    return Impl->getVectorizedFunction(F, VF);
-+  std::string getVectorizedFunction(StringRef F, const ElementCount &VF, bool IsFast) const {
-+    return Impl->getVectorizedFunction(F, VF, IsFast);
-+  }
-+
-+  Optional<CallingConv::ID> getVectorizedFunctionCallingConv(
-+    StringRef F, const FunctionType &FTy, const DataLayout &DL) const {
-+    return Impl->getVectorizedFunctionCallingConv(F, FTy, DL);
-   }
- 
-   /// Tests if the function is both available and a candidate for optimized code
-diff --git a/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h b/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h
-index 78ebb35e0ea4d..3ffb57db8b18b 100644
---- a/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h
-+++ b/llvm-14.0.6.src/include/llvm/AsmParser/LLToken.h
-@@ -133,6 +133,9 @@ enum Kind {
-   kw_fastcc,
-   kw_coldcc,
-   kw_intel_ocl_bicc,
-+  kw_intel_svmlcc128,
-+  kw_intel_svmlcc256,
-+  kw_intel_svmlcc512,
-   kw_cfguard_checkcc,
-   kw_x86_stdcallcc,
-   kw_x86_fastcallcc,
-diff --git a/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt b/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt
-index 0498fc269b634..23bb3de41bc1a 100644
---- a/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt
-+++ b/llvm-14.0.6.src/include/llvm/IR/CMakeLists.txt
-@@ -20,3 +20,7 @@ tablegen(LLVM IntrinsicsX86.h -gen-intrinsic-enums -intrinsic-prefix=x86)
- tablegen(LLVM IntrinsicsXCore.h -gen-intrinsic-enums -intrinsic-prefix=xcore)
- tablegen(LLVM IntrinsicsVE.h -gen-intrinsic-enums -intrinsic-prefix=ve)
- add_public_tablegen_target(intrinsics_gen)
-+
-+set(LLVM_TARGET_DEFINITIONS SVML.td)
-+tablegen(LLVM SVML.inc -gen-svml)
-+add_public_tablegen_target(svml_gen)
-diff --git a/llvm-14.0.6.src/include/llvm/IR/CallingConv.h b/llvm-14.0.6.src/include/llvm/IR/CallingConv.h
-index fd28542465225..096eea1a8e19b 100644
---- a/llvm-14.0.6.src/include/llvm/IR/CallingConv.h
-+++ b/llvm-14.0.6.src/include/llvm/IR/CallingConv.h
-@@ -252,6 +252,11 @@ namespace CallingConv {
-     /// M68k_INTR - Calling convention used for M68k interrupt routines.
-     M68k_INTR = 101,
- 
-+    /// Intel_SVML - Calling conventions for Intel Short Math Vector Library
-+    Intel_SVML128 = 102,
-+    Intel_SVML256 = 103,
-+    Intel_SVML512 = 104,
-+
-     /// The highest possible calling convention ID. Must be some 2^k - 1.
-     MaxID = 1023
-   };
-diff --git a/llvm-14.0.6.src/include/llvm/IR/SVML.td b/llvm-14.0.6.src/include/llvm/IR/SVML.td
-new file mode 100644
-index 0000000000000..5af710404c9d9
---- /dev/null
-+++ b/llvm-14.0.6.src/include/llvm/IR/SVML.td
-@@ -0,0 +1,62 @@
-+//===-- Intel_SVML.td - Defines SVML call variants ---------*- tablegen -*-===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file is used by TableGen to define the different typs of SVML function
-+// variants used with -fveclib=SVML.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+class SvmlVariant;
-+
-+def sin        : SvmlVariant;
-+def cos        : SvmlVariant;
-+def pow        : SvmlVariant;
-+def exp        : SvmlVariant;
-+def log        : SvmlVariant;
-+def acos       : SvmlVariant;
-+def acosh      : SvmlVariant;
-+def asin       : SvmlVariant;
-+def asinh      : SvmlVariant;
-+def atan2      : SvmlVariant;
-+def atan       : SvmlVariant;
-+def atanh      : SvmlVariant;
-+def cbrt       : SvmlVariant;
-+def cdfnorm    : SvmlVariant;
-+def cdfnorminv : SvmlVariant;
-+def cosd       : SvmlVariant;
-+def cosh       : SvmlVariant;
-+def erf        : SvmlVariant;
-+def erfc       : SvmlVariant;
-+def erfcinv    : SvmlVariant;
-+def erfinv     : SvmlVariant;
-+def exp10      : SvmlVariant;
-+def exp2       : SvmlVariant;
-+def expm1      : SvmlVariant;
-+def hypot      : SvmlVariant;
-+def invsqrt    : SvmlVariant;
-+def log10      : SvmlVariant;
-+def log1p      : SvmlVariant;
-+def log2       : SvmlVariant;
-+def sind       : SvmlVariant;
-+def sinh       : SvmlVariant;
-+def sqrt       : SvmlVariant;
-+def tan        : SvmlVariant;
-+def tanh       : SvmlVariant;
-+
-+// TODO: SVML does not currently provide _ha and _ep variants of these fucnctions.
-+// We should call the default variant of these functions in all cases instead.
-+
-+// def nearbyint  : SvmlVariant;
-+// def logb       : SvmlVariant;
-+// def floor      : SvmlVariant;
-+// def fmod       : SvmlVariant;
-+// def ceil       : SvmlVariant;
-+// def trunc      : SvmlVariant;
-+// def rint       : SvmlVariant;
-+// def round      : SvmlVariant;
-diff --git a/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt b/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt
-index aec84124129f4..98286e166fbe2 100644
---- a/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt
-+++ b/llvm-14.0.6.src/lib/Analysis/CMakeLists.txt
-@@ -150,6 +150,7 @@ add_llvm_component_library(LLVMAnalysis
-   DEPENDS
-   intrinsics_gen
-   ${MLDeps}
-+  svml_gen
- 
-   LINK_LIBS
-   ${MLLinkDeps}
-diff --git a/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp b/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp
-index 02923c2c7eb14..83abde28a62a4 100644
---- a/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp
-+++ b/llvm-14.0.6.src/lib/Analysis/TargetLibraryInfo.cpp
-@@ -110,6 +110,11 @@ bool TargetLibraryInfoImpl::isCallingConvCCompatible(Function *F) {
-                                     F->getFunctionType());
- }
- 
-+static std::string svmlMangle(StringRef FnName, const bool IsFast) {
-+  std::string FullName = FnName.str();
-+  return IsFast ? FullName : FullName + "_ha";
-+}
-+
- /// Initialize the set of available library functions based on the specified
- /// target triple. This should be carefully written so that a missing target
- /// triple gets a sane set of defaults.
-@@ -1876,8 +1881,9 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
-   }
-   case SVML: {
-     const VecDesc VecFuncs[] = {
--    #define TLI_DEFINE_SVML_VECFUNCS
--    #include "llvm/Analysis/VecFuncs.def"
-+    #define GET_SVML_VARIANTS
-+    #include "llvm/IR/SVML.inc"
-+    #undef GET_SVML_VARIANTS
-     };
-     addVectorizableFunctions(VecFuncs);
-     break;
-@@ -1897,20 +1903,51 @@ bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
-   return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName;
- }
- 
--StringRef
--TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
--                                             const ElementCount &VF) const {
-+std::string TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
-+                                                         const ElementCount &VF,
-+                                                         bool IsFast) const {
-+  bool FromSVML = ClVectorLibrary == SVML;
-   F = sanitizeFunctionName(F);
-   if (F.empty())
--    return F;
-+    return F.str();
-   std::vector<VecDesc>::const_iterator I =
-       llvm::lower_bound(VectorDescs, F, compareWithScalarFnName);
-   while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) {
--    if (I->VectorizationFactor == VF)
--      return I->VectorFnName;
-+    if (I->VectorizationFactor == VF) {
-+      if (FromSVML) {
-+        return svmlMangle(I->VectorFnName, IsFast);
-+      }
-+      return I->VectorFnName.str();
-+    }
-     ++I;
-   }
--  return StringRef();
-+  return std::string();
-+}
-+
-+static CallingConv::ID getSVMLCallingConv(const DataLayout &DL, const FunctionType &FType)
-+{
-+  assert(isa<VectorType>(FType.getReturnType()));
-+  auto *VecCallRetType = cast<VectorType>(FType.getReturnType());
-+  auto TypeBitWidth = DL.getTypeSizeInBits(VecCallRetType);
-+  if (TypeBitWidth == 128) {
-+    return CallingConv::Intel_SVML128;
-+  } else if (TypeBitWidth == 256) {
-+    return CallingConv::Intel_SVML256;
-+  } else if (TypeBitWidth == 512) {
-+    return CallingConv::Intel_SVML512;
-+  } else {
-+    llvm_unreachable("Invalid vector width");
-+  }
-+  return 0; // not reachable
-+}
-+
-+Optional<CallingConv::ID>
-+TargetLibraryInfoImpl::getVectorizedFunctionCallingConv(
-+    StringRef F, const FunctionType &FTy, const DataLayout &DL) const {
-+  if (F.startswith("__svml")) {
-+    return getSVMLCallingConv(DL, FTy);
-+  }
-+  return {};
- }
- 
- TargetLibraryInfo TargetLibraryAnalysis::run(const Function &F,
-diff --git a/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp b/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp
-index e3bf41c9721b6..4f9dccd4e0724 100644
---- a/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp
-+++ b/llvm-14.0.6.src/lib/AsmParser/LLLexer.cpp
-@@ -603,6 +603,9 @@ lltok::Kind LLLexer::LexIdentifier() {
-   KEYWORD(spir_kernel);
-   KEYWORD(spir_func);
-   KEYWORD(intel_ocl_bicc);
-+  KEYWORD(intel_svmlcc128);
-+  KEYWORD(intel_svmlcc256);
-+  KEYWORD(intel_svmlcc512);
-   KEYWORD(x86_64_sysvcc);
-   KEYWORD(win64cc);
-   KEYWORD(x86_regcallcc);
-diff --git a/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp b/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp
-index 432ec151cf8ae..3bd6ee61024b8 100644
---- a/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp
-+++ b/llvm-14.0.6.src/lib/AsmParser/LLParser.cpp
-@@ -1781,6 +1781,9 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
- ///   ::= 'ccc'
- ///   ::= 'fastcc'
- ///   ::= 'intel_ocl_bicc'
-+///   ::= 'intel_svmlcc128'
-+///   ::= 'intel_svmlcc256'
-+///   ::= 'intel_svmlcc512'
- ///   ::= 'coldcc'
- ///   ::= 'cfguard_checkcc'
- ///   ::= 'x86_stdcallcc'
-@@ -1850,6 +1853,9 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
-   case lltok::kw_spir_kernel:    CC = CallingConv::SPIR_KERNEL; break;
-   case lltok::kw_spir_func:      CC = CallingConv::SPIR_FUNC; break;
-   case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break;
-+  case lltok::kw_intel_svmlcc128:CC = CallingConv::Intel_SVML128; break;
-+  case lltok::kw_intel_svmlcc256:CC = CallingConv::Intel_SVML256; break;
-+  case lltok::kw_intel_svmlcc512:CC = CallingConv::Intel_SVML512; break;
-   case lltok::kw_x86_64_sysvcc:  CC = CallingConv::X86_64_SysV; break;
-   case lltok::kw_win64cc:        CC = CallingConv::Win64; break;
-   case lltok::kw_webkit_jscc:    CC = CallingConv::WebKit_JS; break;
-diff --git a/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp
-index 0ff045fa787e8..175651949ef85 100644
---- a/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp
-+++ b/llvm-14.0.6.src/lib/CodeGen/ReplaceWithVeclib.cpp
-@@ -157,7 +157,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
-   // and the exact vector width of the call operands in the
-   // TargetLibraryInfo.
-   const std::string TLIName =
--      std::string(TLI.getVectorizedFunction(ScalarName, VF));
-+      std::string(TLI.getVectorizedFunction(ScalarName, VF, CI.getFastMathFlags().isFast()));
- 
-   LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
-                     << ScalarName << "` and vector width " << VF << ".\n");
-diff --git a/llvm-14.0.6.src/lib/IR/AsmWriter.cpp b/llvm-14.0.6.src/lib/IR/AsmWriter.cpp
-index 179754e275b03..c4e95752c97e8 100644
---- a/llvm-14.0.6.src/lib/IR/AsmWriter.cpp
-+++ b/llvm-14.0.6.src/lib/IR/AsmWriter.cpp
-@@ -306,6 +306,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
-   case CallingConv::X86_RegCall:   Out << "x86_regcallcc"; break;
-   case CallingConv::X86_VectorCall:Out << "x86_vectorcallcc"; break;
-   case CallingConv::Intel_OCL_BI:  Out << "intel_ocl_bicc"; break;
-+  case CallingConv::Intel_SVML128: Out << "intel_svmlcc128"; break;
-+  case CallingConv::Intel_SVML256: Out << "intel_svmlcc256"; break;
-+  case CallingConv::Intel_SVML512: Out << "intel_svmlcc512"; break;
-   case CallingConv::ARM_APCS:      Out << "arm_apcscc"; break;
-   case CallingConv::ARM_AAPCS:     Out << "arm_aapcscc"; break;
-   case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break;
-diff --git a/llvm-14.0.6.src/lib/IR/Verifier.cpp b/llvm-14.0.6.src/lib/IR/Verifier.cpp
-index 989d01e2e3950..bae7382a36e13 100644
---- a/llvm-14.0.6.src/lib/IR/Verifier.cpp
-+++ b/llvm-14.0.6.src/lib/IR/Verifier.cpp
-@@ -2457,6 +2457,9 @@ void Verifier::visitFunction(const Function &F) {
-   case CallingConv::Fast:
-   case CallingConv::Cold:
-   case CallingConv::Intel_OCL_BI:
-+  case CallingConv::Intel_SVML128:
-+  case CallingConv::Intel_SVML256:
-+  case CallingConv::Intel_SVML512:
-   case CallingConv::PTX_Kernel:
-   case CallingConv::PTX_Device:
-     Assert(!F.isVarArg(), "Calling convention does not support varargs or "
-diff --git a/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td b/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td
-index 4dd8a6cdd8982..12e65521215e4 100644
---- a/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td
-+++ b/llvm-14.0.6.src/lib/Target/X86/X86CallingConv.td
-@@ -498,6 +498,21 @@ def RetCC_X86_64 : CallingConv<[
-   CCDelegateTo<RetCC_X86_64_C>
- ]>;
- 
-+// Intel_SVML return-value convention.
-+def RetCC_Intel_SVML : CallingConv<[
-+  // Vector types are returned in XMM0,XMM1
-+  CCIfType<[v4f32, v2f64],
-+            CCAssignToReg<[XMM0,XMM1]>>,
-+
-+  // 256-bit FP vectors
-+  CCIfType<[v8f32, v4f64],
-+            CCAssignToReg<[YMM0,YMM1]>>,
-+
-+  // 512-bit FP vectors
-+  CCIfType<[v16f32, v8f64],
-+            CCAssignToReg<[ZMM0,ZMM1]>>
-+]>;
-+
- // This is the return-value convention used for the entire X86 backend.
- let Entry = 1 in
- def RetCC_X86 : CallingConv<[
-@@ -505,6 +520,10 @@ def RetCC_X86 : CallingConv<[
-   // Check if this is the Intel OpenCL built-ins calling convention
-   CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<RetCC_Intel_OCL_BI>>,
- 
-+  CCIfCC<"CallingConv::Intel_SVML128", CCDelegateTo<RetCC_Intel_SVML>>,
-+  CCIfCC<"CallingConv::Intel_SVML256", CCDelegateTo<RetCC_Intel_SVML>>,
-+  CCIfCC<"CallingConv::Intel_SVML512", CCDelegateTo<RetCC_Intel_SVML>>,
-+
-   CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
-   CCDelegateTo<RetCC_X86_32>
- ]>;
-@@ -1064,6 +1083,30 @@ def CC_Intel_OCL_BI : CallingConv<[
-   CCDelegateTo<CC_X86_32_C>
- ]>;
- 
-+// X86-64 Intel Short Vector Math Library calling convention.
-+def CC_Intel_SVML : CallingConv<[
-+
-+  // The SSE vector arguments are passed in XMM registers.
-+  CCIfType<[v4f32, v2f64],
-+           CCAssignToReg<[XMM0, XMM1, XMM2]>>,
-+
-+  // The 256-bit vector arguments are passed in YMM registers.
-+  CCIfType<[v8f32, v4f64],
-+           CCAssignToReg<[YMM0, YMM1, YMM2]>>,
-+
-+  // The 512-bit vector arguments are passed in ZMM registers.
-+  CCIfType<[v16f32, v8f64],
-+           CCAssignToReg<[ZMM0, ZMM1, ZMM2]>>
-+]>;
-+
-+def CC_X86_32_Intr : CallingConv<[
-+  CCAssignToStack<4, 4>
-+]>;
-+
-+def CC_X86_64_Intr : CallingConv<[
-+  CCAssignToStack<8, 8>
-+]>;
-+
- //===----------------------------------------------------------------------===//
- // X86 Root Argument Calling Conventions
- //===----------------------------------------------------------------------===//
-@@ -1115,6 +1158,9 @@ def CC_X86_64 : CallingConv<[
- let Entry = 1 in
- def CC_X86 : CallingConv<[
-   CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<CC_Intel_OCL_BI>>,
-+  CCIfCC<"CallingConv::Intel_SVML128", CCDelegateTo<CC_Intel_SVML>>,
-+  CCIfCC<"CallingConv::Intel_SVML256", CCDelegateTo<CC_Intel_SVML>>,
-+  CCIfCC<"CallingConv::Intel_SVML512", CCDelegateTo<CC_Intel_SVML>>,
-   CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
-   CCDelegateTo<CC_X86_32>
- ]>;
-@@ -1227,3 +1273,27 @@ def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP,
-                                                (sequence "R%u", 12, 15))>;
- def CSR_SysV64_RegCall       : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,               
-                                                (sequence "XMM%u", 8, 15))>;
-+
-+// SVML calling convention
-+def CSR_32_Intel_SVML        : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE)>;
-+def CSR_32_Intel_SVML_AVX512 : CalleeSavedRegs<(add CSR_32_Intel_SVML,
-+                                                K4, K5, K6, K7)>;
-+
-+def CSR_64_Intel_SVML_NoSSE : CalleeSavedRegs<(add RBX, RSI, RDI, RBP, RSP, R12, R13, R14, R15)>;
-+
-+def CSR_64_Intel_SVML       : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
-+                                               (sequence "XMM%u", 8, 15))>;
-+def CSR_Win64_Intel_SVML    : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
-+                                               (sequence "XMM%u", 6, 15))>;
-+
-+def CSR_64_Intel_SVML_AVX        : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
-+                                                    (sequence "YMM%u", 8, 15))>;
-+def CSR_Win64_Intel_SVML_AVX     : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
-+                                                    (sequence "YMM%u", 6, 15))>;
-+
-+def CSR_64_Intel_SVML_AVX512     : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
-+                                                    (sequence "ZMM%u", 16, 31),
-+                                                    K4, K5, K6, K7)>;
-+def CSR_Win64_Intel_SVML_AVX512  : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
-+                                                    (sequence "ZMM%u", 6, 21),
-+                                                    K4, K5, K6, K7)>;
-diff --git a/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp b/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp
-index 8bb7e81e19bbd..1780ce3fc6467 100644
---- a/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp
-+++ b/llvm-14.0.6.src/lib/Target/X86/X86ISelLowering.cpp
-@@ -3788,7 +3788,8 @@ void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
-   // FIXME: Only some x86_32 calling conventions support AVX512.
-   if (Subtarget.useAVX512Regs() &&
-       (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
--                     CallConv == CallingConv::Intel_OCL_BI)))
-+                     CallConv == CallingConv::Intel_OCL_BI   ||
-+                     CallConv == CallingConv::Intel_SVML512)))
-     VecVT = MVT::v16f32;
-   else if (Subtarget.hasAVX())
-     VecVT = MVT::v8f32;
-diff --git a/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp b/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp
-index 130cb61cdde24..9eec3b25ca9f2 100644
---- a/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp
-+++ b/llvm-14.0.6.src/lib/Target/X86/X86RegisterInfo.cpp
-@@ -272,6 +272,42 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
-   }
- }
- 
-+namespace {
-+std::pair<const uint32_t *, const MCPhysReg *> getSVMLRegMaskAndSaveList(
-+  bool Is64Bit, bool IsWin64, CallingConv::ID CC) {
-+  assert(CC >= CallingConv::Intel_SVML128 && CC <= CallingConv::Intel_SVML512);
-+  unsigned Abi = CC - CallingConv::Intel_SVML128 ; // 0 - 128, 1 - 256, 2 - 512
-+
-+  const std::pair<const uint32_t *, const MCPhysReg *> Abi64[] = {
-+    std::make_pair(CSR_64_Intel_SVML_RegMask,        CSR_64_Intel_SVML_SaveList),
-+    std::make_pair(CSR_64_Intel_SVML_AVX_RegMask,    CSR_64_Intel_SVML_AVX_SaveList),
-+    std::make_pair(CSR_64_Intel_SVML_AVX512_RegMask, CSR_64_Intel_SVML_AVX512_SaveList),
-+  };
-+
-+  const std::pair<const uint32_t *, const MCPhysReg *> AbiWin64[] = {
-+    std::make_pair(CSR_Win64_Intel_SVML_RegMask,        CSR_Win64_Intel_SVML_SaveList),
-+    std::make_pair(CSR_Win64_Intel_SVML_AVX_RegMask,    CSR_Win64_Intel_SVML_AVX_SaveList),
-+    std::make_pair(CSR_Win64_Intel_SVML_AVX512_RegMask, CSR_Win64_Intel_SVML_AVX512_SaveList),
-+  };
-+
-+  const std::pair<const uint32_t *, const MCPhysReg *> Abi32[] = {
-+    std::make_pair(CSR_32_Intel_SVML_RegMask,        CSR_32_Intel_SVML_SaveList),
-+    std::make_pair(CSR_32_Intel_SVML_RegMask,        CSR_32_Intel_SVML_SaveList),
-+    std::make_pair(CSR_32_Intel_SVML_AVX512_RegMask, CSR_32_Intel_SVML_AVX512_SaveList),
-+  };
-+
-+  if (Is64Bit) {
-+    if (IsWin64) {
-+      return AbiWin64[Abi];
-+    } else {
-+      return Abi64[Abi];
-+    }
-+  } else {
-+    return Abi32[Abi];
-+  }
-+}
-+}
-+
- const MCPhysReg *
- X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-   assert(MF && "MachineFunction required");
-@@ -327,6 +363,11 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-       return CSR_64_Intel_OCL_BI_SaveList;
-     break;
-   }
-+  case CallingConv::Intel_SVML128:
-+  case CallingConv::Intel_SVML256:
-+  case CallingConv::Intel_SVML512: {
-+    return getSVMLRegMaskAndSaveList(Is64Bit, IsWin64, CC).second;
-+  }
-   case CallingConv::HHVM:
-     return CSR_64_HHVM_SaveList;
-   case CallingConv::X86_RegCall:
-@@ -449,6 +490,11 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
-       return CSR_64_Intel_OCL_BI_RegMask;
-     break;
-   }
-+  case CallingConv::Intel_SVML128:
-+  case CallingConv::Intel_SVML256:
-+  case CallingConv::Intel_SVML512: {
-+    return getSVMLRegMaskAndSaveList(Is64Bit, IsWin64, CC).first;
-+  }
-   case CallingConv::HHVM:
-     return CSR_64_HHVM_RegMask;
-   case CallingConv::X86_RegCall:
-diff --git a/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h b/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h
-index 5d773f0c57dfb..6bdf5bc6f3fe9 100644
---- a/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h
-+++ b/llvm-14.0.6.src/lib/Target/X86/X86Subtarget.h
-@@ -916,6 +916,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
-     case CallingConv::X86_ThisCall:
-     case CallingConv::X86_VectorCall:
-     case CallingConv::Intel_OCL_BI:
-+    case CallingConv::Intel_SVML128:
-+    case CallingConv::Intel_SVML256:
-+    case CallingConv::Intel_SVML512:
-       return isTargetWin64();
-     // This convention allows using the Win64 convention on other targets.
-     case CallingConv::Win64:
-diff --git a/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp
-index 047bf5569ded3..59897785f156c 100644
---- a/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp
-+++ b/llvm-14.0.6.src/lib/Transforms/Utils/InjectTLIMappings.cpp
-@@ -92,7 +92,7 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
- 
-   auto AddVariantDecl = [&](const ElementCount &VF) {
-     const std::string TLIName =
--        std::string(TLI.getVectorizedFunction(ScalarName, VF));
-+        std::string(TLI.getVectorizedFunction(ScalarName, VF, CI.getFastMathFlags().isFast()));
-     if (!TLIName.empty()) {
-       std::string MangledName =
-           VFABI::mangleTLIVectorName(TLIName, ScalarName, CI.arg_size(), VF);
-diff --git a/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp
-index 46ff0994e04e7..f472af5e1a835 100644
---- a/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp
-+++ b/llvm-14.0.6.src/lib/Transforms/Vectorize/LoopVectorize.cpp
-@@ -712,6 +712,27 @@ class InnerLoopVectorizer {
-   virtual void printDebugTracesAtStart(){};
-   virtual void printDebugTracesAtEnd(){};
- 
-+  /// Check legality of given SVML call instruction \p VecCall generated for
-+  /// scalar call \p Call. If illegal then the appropriate legal instruction
-+  /// is returned.
-+  Value *legalizeSVMLCall(CallInst *VecCall, CallInst *Call);
-+
-+  /// Returns the legal VF for a call instruction \p CI using TTI information
-+  /// and vector type.
-+  ElementCount getLegalVFForCall(CallInst *CI);
-+
-+  /// Partially vectorize a given call \p Call by breaking it down into multiple
-+  /// calls of \p LegalCall, decided by the variant VF \p LegalVF.
-+  Value *partialVectorizeCall(CallInst *Call, CallInst *LegalCall,
-+                              unsigned LegalVF);
-+
-+  /// Generate shufflevector instruction for a vector value \p V based on the
-+  /// current \p Part and a smaller VF \p LegalVF.
-+  Value *generateShuffleValue(Value *V, unsigned LegalVF, unsigned Part);
-+
-+  /// Combine partially vectorized calls stored in \p CallResults.
-+  Value *combinePartialVecCalls(SmallVectorImpl<Value *> &CallResults);
-+
-   /// The original loop.
-   Loop *OrigLoop;
- 
-@@ -4596,6 +4617,17 @@ static bool mayDivideByZero(Instruction &I) {
-   return !CInt || CInt->isZero();
- }
- 
-+static void setVectorFunctionCallingConv(CallInst &CI, const DataLayout &DL,
-+                                         const TargetLibraryInfo &TLI) {
-+  Function *VectorF = CI.getCalledFunction();
-+  FunctionType *FTy = VectorF->getFunctionType();
-+  StringRef VFName = VectorF->getName();
-+  auto CC = TLI.getVectorizedFunctionCallingConv(VFName, *FTy, DL);
-+  if (CC) {
-+    CI.setCallingConv(*CC);
-+  }
-+}
-+
- void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
-                                                VPUser &ArgOperands,
-                                                VPTransformState &State) {
-@@ -4664,9 +4696,246 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
-       if (isa<FPMathOperator>(V))
-         V->copyFastMathFlags(CI);
- 
-+    const DataLayout &DL = V->getModule()->getDataLayout();
-+    setVectorFunctionCallingConv(*V, DL, *TLI);
-+
-+    // Perform legalization of SVML call instruction only if original call
-+    // was not Intrinsic
-+    if (!UseVectorIntrinsic &&
-+        (V->getCalledFunction()->getName()).startswith("__svml")) {
-+      // assert((V->getCalledFunction()->getName()).startswith("__svml"));
-+      LLVM_DEBUG(dbgs() << "LV(SVML): Vector call inst:"; V->dump());
-+      auto *LegalV = cast<Instruction>(legalizeSVMLCall(V, CI));
-+      LLVM_DEBUG(dbgs() << "LV: Completed SVML legalization.\n LegalV: ";
-+                 LegalV->dump());
-+      State.set(Def, LegalV, Part);
-+      addMetadata(LegalV, &I);
-+    } else {
-       State.set(Def, V, Part);
-       addMetadata(V, &I);
-+    }
-+  }
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Implementation of functions for SVML vector call legalization.
-+//===----------------------------------------------------------------------===//
-+//
-+// Unlike other VECLIBs, SVML needs to be used with target-legal
-+// vector types. Otherwise, link failures and/or runtime failures
-+// will occur. A motivating example could be -
-+//
-+//   double *a;
-+//   float *b;
-+//   #pragma clang loop vectorize_width(8)
-+//   for(i = 0; i < N; ++i) {
-+//     a[i] = sin(i);   // Legal SVML VF must be 4 or below on AVX
-+//     b[i] = cosf(i);  // VF can be 8 on AVX since 8 floats can fit in YMM
-+//    }
-+//
-+// Current implementation of vector code generation in LV is
-+// driven based on a single VF (in InnerLoopVectorizer::VF). This
-+// inhibits the flexibility of adjusting/choosing different VF
-+// for different instructions.
-+//
-+// Due to this limitation it is much more straightforward to
-+// first generate the illegal sin8 (svml_sin8 for SVML vector
-+// library) call and then legalize it than trying to avoid
-+// generating illegal code from the beginning.
-+//
-+// A solution for this problem is to check legality of the
-+// call instruction right after generating it in vectorizer and
-+// if it is illegal we split the call arguments and issue multiple
-+// calls to match the legal VF. This is demonstrated currently for
-+// the SVML vector library calls (non-intrinsic version only).
-+//
-+// Future directions and extensions:
-+// 1) This legalization example shows us that a good direction
-+//    for the VPlan framework would be to model the vector call
-+//    instructions in a way that legal VF for each call is chosen
-+//    correctly within vectorizer and illegal code generation is
-+//    avoided.
-+// 2) This logic can also be extended to general vector functions
-+//    i.e. legalization OpenMP decalre simd functions. The
-+//    requirements needed for this will be documented soon.
-+
-+Value *InnerLoopVectorizer::legalizeSVMLCall(CallInst *VecCall,
-+                                             CallInst *Call) {
-+  ElementCount LegalVF = getLegalVFForCall(VecCall);
-+
-+  assert(LegalVF.getKnownMinValue() > 1 &&
-+         "Legal VF for SVML call must be greater than 1 to vectorize");
-+
-+  if (LegalVF == VF)
-+    return VecCall;
-+  else if (LegalVF.getKnownMinValue() > VF.getKnownMinValue())
-+    // TODO: handle case when we are underfilling vectors
-+    return VecCall;
-+
-+  // Legal VF for this SVML call is smaller than chosen VF, break it down into
-+  // smaller call instructions
-+
-+  // Convert args, types and return type to match legal VF
-+  SmallVector<Type *, 4> NewTys;
-+  SmallVector<Value *, 4> NewArgs;
-+
-+  for (Value *ArgOperand : Call->args()) {
-+    Type *Ty = ToVectorTy(ArgOperand->getType(), LegalVF);
-+    NewTys.push_back(Ty);
-+    NewArgs.push_back(UndefValue::get(Ty));
-   }
-+
-+  // Construct legal vector function
-+  const VFShape Shape =
-+    VFShape::get(*Call, LegalVF /*EC*/, false /*HasGlobalPred*/);
-+  Function *LegalVectorF = VFDatabase(*Call).getVectorizedFunction(Shape);
-+  assert(LegalVectorF != nullptr && "Can't create legal vector function.");
-+
-+  LLVM_DEBUG(dbgs() << "LV(SVML): LegalVectorF: "; LegalVectorF->dump());
-+
-+  SmallVector<OperandBundleDef, 1> OpBundles;
-+  Call->getOperandBundlesAsDefs(OpBundles);
-+  auto LegalV = std::unique_ptr<CallInst>(CallInst::Create(LegalVectorF, NewArgs, OpBundles));
-+
-+  if (isa<FPMathOperator>(LegalV))
-+    LegalV->copyFastMathFlags(Call);
-+
-+  const DataLayout &DL = VecCall->getModule()->getDataLayout();
-+  // Set SVML calling conventions
-+  setVectorFunctionCallingConv(*LegalV, DL, *TLI);
-+
-+  LLVM_DEBUG(dbgs() << "LV(SVML): LegalV: "; LegalV->dump());
-+
-+  Value *LegalizedCall = partialVectorizeCall(VecCall, LegalV.get(), LegalVF.getKnownMinValue());
-+
-+  LLVM_DEBUG(dbgs() << "LV(SVML): LegalizedCall: "; LegalizedCall->dump());
-+
-+  // Remove the illegal call from Builder
-+  VecCall->eraseFromParent();
-+
-+  return LegalizedCall;
-+}
-+
-+ElementCount InnerLoopVectorizer::getLegalVFForCall(CallInst *CI) {
-+  const DataLayout DL = CI->getModule()->getDataLayout();
-+  FunctionType *CallFT = CI->getFunctionType();
-+  // All functions that need legalization should have a vector return type.
-+  // This is true for all SVML functions that are currently supported.
-+  assert(isa<VectorType>(CallFT->getReturnType()) &&
-+         "Return type of call that needs legalization is not a vector.");
-+  auto *VecCallRetType = cast<VectorType>(CallFT->getReturnType());
-+  Type *ElemType = VecCallRetType->getElementType();
-+
-+  unsigned TypeBitWidth = DL.getTypeSizeInBits(ElemType);
-+  unsigned VectorBitWidth = TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector);
-+  unsigned LegalVF = VectorBitWidth / TypeBitWidth;
-+
-+  LLVM_DEBUG(dbgs() << "LV(SVML): Type Bit Width: " << TypeBitWidth << "\n");
-+  LLVM_DEBUG(dbgs() << "LV(SVML): Current VL: " << VF << "\n");
-+  LLVM_DEBUG(dbgs() << "LV(SVML): Vector Bit Width: " << VectorBitWidth
-+                    << "\n");
-+  LLVM_DEBUG(dbgs() << "LV(SVML): Legal Target VL: " << LegalVF << "\n");
-+
-+  return ElementCount::getFixed(LegalVF);
-+}
-+
-+// Partial vectorization of a call instruction is achieved by making clones of
-+// \p LegalCall and overwriting its argument operands with shufflevector
-+// equivalent decided based on \p LegalVF and current Part being filled.
-+Value *InnerLoopVectorizer::partialVectorizeCall(CallInst *Call,
-+                                                 CallInst *LegalCall,
-+                                                 unsigned LegalVF) {
-+  unsigned NumParts = VF.getKnownMinValue() / LegalVF;
-+  LLVM_DEBUG(dbgs() << "LV(SVML): NumParts: " << NumParts << "\n");
-+  SmallVector<Value *, 8> CallResults;
-+
-+  for (unsigned Part = 0; Part < NumParts; ++Part) {
-+    auto *ClonedCall = cast<CallInst>(LegalCall->clone());
-+
-+    // Update the arg operand of cloned call to shufflevector
-+    for (unsigned i = 0, ie = Call->arg_size(); i != ie; ++i) {
-+      auto *NewOp = generateShuffleValue(Call->getArgOperand(i), LegalVF, Part);
-+      ClonedCall->setArgOperand(i, NewOp);
-+    }
-+
-+    LLVM_DEBUG(dbgs() << "LV(SVML): ClonedCall: "; ClonedCall->dump());
-+
-+    auto *PartialVecCall = Builder.Insert(ClonedCall);
-+    CallResults.push_back(PartialVecCall);
-+  }
-+
-+  return combinePartialVecCalls(CallResults);
-+}
-+
-+Value *InnerLoopVectorizer::generateShuffleValue(Value *V, unsigned LegalVF,
-+                                                 unsigned Part) {
-+  // Example:
-+  // Consider the following vector code -
-+  // %1 = sitofp <4 x i32> %0 to <4 x double>
-+  // %2 = call <4 x double> @__svml_sin4(<4 x double> %1)
-+  //
-+  // If the LegalVF is 2, we partially vectorize the sin4 call by invoking
-+  // generateShuffleValue on the operand %1
-+  // If Part = 1, output value is -
-+  // %shuffle = shufflevector <4 x double> %1, <4 x double> undef, <2 x i32><i32 0, i32 1>
-+  // and if Part = 2, output is -
-+  // %shuffle7 =shufflevector <4 x double> %1, <4 x double> undef, <2 x i32><i32 2, i32 3>
-+
-+  assert(isa<VectorType>(V->getType()) &&
-+         "Cannot generate shuffles for non-vector values.");
-+  SmallVector<int, 4> ShuffleMask;
-+  Value *Undef = UndefValue::get(V->getType());
-+
-+  unsigned ElemIdx = Part * LegalVF;
-+
-+  for (unsigned K = 0; K < LegalVF; K++)
-+    ShuffleMask.push_back(static_cast<int>(ElemIdx + K));
-+
-+  auto *ShuffleInst =
-+      Builder.CreateShuffleVector(V, Undef, ShuffleMask, "shuffle");
-+
-+  return ShuffleInst;
-+}
-+
-+// Results of the calls executed by smaller legal call instructions must be
-+// combined to match the original VF for later use. This is done by constructing
-+// shufflevector instructions in a cumulative fashion.
-+Value *InnerLoopVectorizer::combinePartialVecCalls(
-+    SmallVectorImpl<Value *> &CallResults) {
-+  assert(isa<VectorType>(CallResults[0]->getType()) &&
-+         "Cannot combine calls with non-vector results.");
-+  auto *CallType = cast<VectorType>(CallResults[0]->getType());
-+
-+  Value *CombinedShuffle;
-+  unsigned NumElems = CallType->getElementCount().getKnownMinValue() * 2;
-+  unsigned NumRegs = CallResults.size();
-+
-+  assert(NumRegs >= 2 && isPowerOf2_32(NumRegs) &&
-+         "Number of partial vector calls to combine must be a power of 2 "
-+         "(atleast 2^1)");
-+
-+  while (NumRegs > 1) {
-+    for (unsigned I = 0; I < NumRegs; I += 2) {
-+      SmallVector<int, 4> ShuffleMask;
-+      for (unsigned J = 0; J < NumElems; J++)
-+        ShuffleMask.push_back(static_cast<int>(J));
-+
-+      CombinedShuffle = Builder.CreateShuffleVector(
-+          CallResults[I], CallResults[I + 1], ShuffleMask, "combined");
-+      LLVM_DEBUG(dbgs() << "LV(SVML): CombinedShuffle:";
-+                 CombinedShuffle->dump());
-+      CallResults.push_back(CombinedShuffle);
-+    }
-+
-+    SmallVector<Value *, 2>::iterator Start = CallResults.begin();
-+    SmallVector<Value *, 2>::iterator End = Start + NumRegs;
-+    CallResults.erase(Start, End);
-+
-+    NumElems *= 2;
-+    NumRegs /= 2;
-+  }
-+
-+  return CombinedShuffle;
- }
- 
- void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
-diff --git a/llvm-14.0.6.src/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm-14.0.6.src/lib/Transforms/Vectorize/SLPVectorizer.cpp
-index 644372483edde..342f018b92184 100644
---- a/llvm-14.0.6.src/lib/Transforms/Vectorize/SLPVectorizer.cpp
-+++ b/llvm-14.0.6.src/lib/Transforms/Vectorize/SLPVectorizer.cpp
-@@ -6322,6 +6322,17 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
-   return Vec;
- }
- 
-+static void setVectorFunctionCallingConv(CallInst &CI, const DataLayout &DL,
-+                                         const TargetLibraryInfo &TLI) {
-+  Function *VectorF = CI.getCalledFunction();
-+  FunctionType *FTy = VectorF->getFunctionType();
-+  StringRef VFName = VectorF->getName();
-+  auto CC = TLI.getVectorizedFunctionCallingConv(VFName, *FTy, DL);
-+  if (CC) {
-+    CI.setCallingConv(*CC);
-+  }
-+}
-+
- Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
-   IRBuilder<>::InsertPointGuard Guard(Builder);
- 
-@@ -6794,7 +6805,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
- 
-       SmallVector<OperandBundleDef, 1> OpBundles;
-       CI->getOperandBundlesAsDefs(OpBundles);
--      Value *V = Builder.CreateCall(CF, OpVecs, OpBundles);
-+
-+      CallInst *NewCall = Builder.CreateCall(CF, OpVecs, OpBundles);
-+      const DataLayout &DL = NewCall->getModule()->getDataLayout();
-+      setVectorFunctionCallingConv(*NewCall, DL, *TLI);
-+
-+      Value *V = NewCall;
- 
-       // The scalar argument uses an in-tree scalar so we add the new vectorized
-       // call to ExternalUses list to make sure that an extract will be
-diff --git a/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll b/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
-index df8b7c498bd00..63a36549f18fd 100644
---- a/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
-+++ b/llvm-14.0.6.src/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
-@@ -10,7 +10,7 @@ target triple = "x86_64-unknown-linux-gnu"
- define <4 x double> @exp_v4(<4 x double> %in) {
- ; SVML-LABEL: define {{[^@]+}}@exp_v4
- ; SVML-SAME: (<4 x double> [[IN:%.*]]) {
--; SVML-NEXT:    [[TMP1:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[IN]])
-+; SVML-NEXT:    [[TMP1:%.*]] = call <4 x double> @__svml_exp4_ha(<4 x double> [[IN]])
- ; SVML-NEXT:    ret <4 x double> [[TMP1]]
- ;
- ; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_v4
-@@ -37,7 +37,7 @@ declare <4 x double> @llvm.exp.v4f64(<4 x double>) #0
- define <4 x float> @exp_f32(<4 x float> %in) {
- ; SVML-LABEL: define {{[^@]+}}@exp_f32
- ; SVML-SAME: (<4 x float> [[IN:%.*]]) {
--; SVML-NEXT:    [[TMP1:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[IN]])
-+; SVML-NEXT:    [[TMP1:%.*]] = call <4 x float> @__svml_expf4_ha(<4 x float> [[IN]])
- ; SVML-NEXT:    ret <4 x float> [[TMP1]]
- ;
- ; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_f32
-diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
-index a6e191c3d6923..d6e2e11106949 100644
---- a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
-+++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
-@@ -39,7 +39,8 @@ for.end:                                          ; preds = %for.body
- declare double @__exp_finite(double) #0
- 
- ; CHECK-LABEL: @exp_f64
--; CHECK: <4 x double> @__svml_exp4
-+; CHECK: <2 x double> @__svml_exp2
-+; CHECK: <2 x double> @__svml_exp2
- ; CHECK: ret
- define void @exp_f64(double* nocapture %varray) {
- entry:
-@@ -99,7 +100,8 @@ for.end:                                          ; preds = %for.body
- declare double @__log_finite(double) #0
- 
- ; CHECK-LABEL: @log_f64
--; CHECK: <4 x double> @__svml_log4
-+; CHECK: <2 x double> @__svml_log2
-+; CHECK: <2 x double> @__svml_log2
- ; CHECK: ret
- define void @log_f64(double* nocapture %varray) {
- entry:
-@@ -159,7 +161,8 @@ for.end:                                          ; preds = %for.body
- declare double @__pow_finite(double, double) #0
- 
- ; CHECK-LABEL: @pow_f64
--; CHECK: <4 x double> @__svml_pow4
-+; CHECK: <2 x double> @__svml_pow2
-+; CHECK: <2 x double> @__svml_pow2
- ; CHECK: ret
- define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
- entry:
-@@ -190,7 +193,8 @@ declare float @__exp2f_finite(float) #0
- 
- define void @exp2f_finite(float* nocapture %varray) {
- ; CHECK-LABEL: @exp2f_finite(
--; CHECK:    call <4 x float> @__svml_exp2f4(<4 x float> %{{.*}})
-+; CHECK:    call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> %{{.*}})
-+; CHECK:    call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> %{{.*}})
- ; CHECK:    ret void
- ;
- entry:
-@@ -219,7 +223,8 @@ declare double @__exp2_finite(double) #0
- 
- define void @exp2_finite(double* nocapture %varray) {
- ; CHECK-LABEL: @exp2_finite(
--; CHECK:    call <4 x double> @__svml_exp24(<4 x double> {{.*}})
-+; CHECK:    call intel_svmlcc128 <2 x double> @__svml_exp22_ha(<2 x double> {{.*}})
-+; CHECK:    call intel_svmlcc128 <2 x double> @__svml_exp22_ha(<2 x double> {{.*}})
- ; CHECK:    ret void
- ;
- entry:
-@@ -276,7 +281,8 @@ for.end:                                          ; preds = %for.body
- declare double @__log2_finite(double) #0
- 
- ; CHECK-LABEL: @log2_f64
--; CHECK: <4 x double> @__svml_log24
-+; CHECK: <2 x double> @__svml_log22
-+; CHECK: <2 x double> @__svml_log22
- ; CHECK: ret
- define void @log2_f64(double* nocapture %varray) {
- entry:
-@@ -333,7 +339,8 @@ for.end:                                          ; preds = %for.body
- declare double @__log10_finite(double) #0
- 
- ; CHECK-LABEL: @log10_f64
--; CHECK: <4 x double> @__svml_log104
-+; CHECK: <2 x double> @__svml_log102
-+; CHECK: <2 x double> @__svml_log102
- ; CHECK: ret
- define void @log10_f64(double* nocapture %varray) {
- entry:
-@@ -390,7 +397,8 @@ for.end:                                          ; preds = %for.body
- declare double @__sqrt_finite(double) #0
- 
- ; CHECK-LABEL: @sqrt_f64
--; CHECK: <4 x double> @__svml_sqrt4
-+; CHECK: <2 x double> @__svml_sqrt2
-+; CHECK: <2 x double> @__svml_sqrt2
- ; CHECK: ret
- define void @sqrt_f64(double* nocapture %varray) {
- entry:
-diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll
-index 42c280df6ad02..088bbdcf1aa4a 100644
---- a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll
-+++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-calls.ll
-@@ -48,7 +48,7 @@ declare float @llvm.exp2.f32(float) #0
- 
- define void @sin_f64(double* nocapture %varray) {
- ; CHECK-LABEL: @sin_f64(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -71,7 +71,7 @@ for.end:
- 
- define void @sin_f32(float* nocapture %varray) {
- ; CHECK-LABEL: @sin_f32(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_sinf4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -94,7 +94,7 @@ for.end:
- 
- define void @sin_f64_intrinsic(double* nocapture %varray) {
- ; CHECK-LABEL: @sin_f64_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -117,7 +117,7 @@ for.end:
- 
- define void @sin_f32_intrinsic(float* nocapture %varray) {
- ; CHECK-LABEL: @sin_f32_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_sinf4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -140,7 +140,7 @@ for.end:
- 
- define void @cos_f64(double* nocapture %varray) {
- ; CHECK-LABEL: @cos_f64(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -163,7 +163,7 @@ for.end:
- 
- define void @cos_f32(float* nocapture %varray) {
- ; CHECK-LABEL: @cos_f32(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_cosf4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -186,7 +186,7 @@ for.end:
- 
- define void @cos_f64_intrinsic(double* nocapture %varray) {
- ; CHECK-LABEL: @cos_f64_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -209,7 +209,7 @@ for.end:
- 
- define void @cos_f32_intrinsic(float* nocapture %varray) {
- ; CHECK-LABEL: @cos_f32_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_cosf4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -232,7 +232,7 @@ for.end:
- 
- define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
- ; CHECK-LABEL: @pow_f64(
--; CHECK:    [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
-+; CHECK:    [[TMP8:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -257,7 +257,7 @@ for.end:
- 
- define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
- ; CHECK-LABEL: @pow_f64_intrinsic(
--; CHECK:    [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
-+; CHECK:    [[TMP8:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -282,7 +282,7 @@ for.end:
- 
- define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
- ; CHECK-LABEL: @pow_f32(
--; CHECK:    [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
-+; CHECK:    [[TMP8:%.*]] = call intel_svmlcc128 <4 x float> @__svml_powf4_ha(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -307,7 +307,7 @@ for.end:
- 
- define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
- ; CHECK-LABEL: @pow_f32_intrinsic(
--; CHECK:    [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
-+; CHECK:    [[TMP8:%.*]] = call intel_svmlcc128 <4 x float> @__svml_powf4_ha(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -332,7 +332,7 @@ for.end:
- 
- define void @exp_f64(double* nocapture %varray) {
- ; CHECK-LABEL: @exp_f64(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -355,7 +355,7 @@ for.end:
- 
- define void @exp_f32(float* nocapture %varray) {
- ; CHECK-LABEL: @exp_f32(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_expf4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -378,7 +378,7 @@ for.end:
- 
- define void @exp_f64_intrinsic(double* nocapture %varray) {
- ; CHECK-LABEL: @exp_f64_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -401,7 +401,7 @@ for.end:
- 
- define void @exp_f32_intrinsic(float* nocapture %varray) {
- ; CHECK-LABEL: @exp_f32_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_expf4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -424,7 +424,7 @@ for.end:
- 
- define void @log_f64(double* nocapture %varray) {
- ; CHECK-LABEL: @log_f64(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -447,7 +447,7 @@ for.end:
- 
- define void @log_f32(float* nocapture %varray) {
- ; CHECK-LABEL: @log_f32(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_logf4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -470,7 +470,7 @@ for.end:
- 
- define void @log_f64_intrinsic(double* nocapture %varray) {
- ; CHECK-LABEL: @log_f64_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -493,7 +493,7 @@ for.end:
- 
- define void @log_f32_intrinsic(float* nocapture %varray) {
- ; CHECK-LABEL: @log_f32_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_logf4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -516,7 +516,7 @@ for.end:
- 
- define void @log2_f64(double* nocapture %varray) {
- ; CHECK-LABEL: @log2_f64(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log24_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -539,7 +539,7 @@ for.end:
- 
- define void @log2_f32(float* nocapture %varray) {
- ; CHECK-LABEL: @log2_f32(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log2f4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -562,7 +562,7 @@ for.end:
- 
- define void @log2_f64_intrinsic(double* nocapture %varray) {
- ; CHECK-LABEL: @log2_f64_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log24_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -585,7 +585,7 @@ for.end:
- 
- define void @log2_f32_intrinsic(float* nocapture %varray) {
- ; CHECK-LABEL: @log2_f32_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log2f4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -608,7 +608,7 @@ for.end:
- 
- define void @log10_f64(double* nocapture %varray) {
- ; CHECK-LABEL: @log10_f64(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log104_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -631,7 +631,7 @@ for.end:
- 
- define void @log10_f32(float* nocapture %varray) {
- ; CHECK-LABEL: @log10_f32(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log10f4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -654,7 +654,7 @@ for.end:
- 
- define void @log10_f64_intrinsic(double* nocapture %varray) {
- ; CHECK-LABEL: @log10_f64_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log104_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -677,7 +677,7 @@ for.end:
- 
- define void @log10_f32_intrinsic(float* nocapture %varray) {
- ; CHECK-LABEL: @log10_f32_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_log10f4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -700,7 +700,7 @@ for.end:
- 
- define void @sqrt_f64(double* nocapture %varray) {
- ; CHECK-LABEL: @sqrt_f64(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sqrt4(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sqrt4_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -723,7 +723,7 @@ for.end:
- 
- define void @sqrt_f32(float* nocapture %varray) {
- ; CHECK-LABEL: @sqrt_f32(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sqrtf4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_sqrtf4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -746,7 +746,7 @@ for.end:
- 
- define void @exp2_f64(double* nocapture %varray) {
- ; CHECK-LABEL: @exp2_f64(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp24_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -769,7 +769,7 @@ for.end:
- 
- define void @exp2_f32(float* nocapture %varray) {
- ; CHECK-LABEL: @exp2_f32(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -792,7 +792,7 @@ for.end:
- 
- define void @exp2_f64_intrinsic(double* nocapture %varray) {
- ; CHECK-LABEL: @exp2_f64_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp24_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -815,7 +815,7 @@ for.end:
- 
- define void @exp2_f32_intrinsic(float* nocapture %varray) {
- ; CHECK-LABEL: @exp2_f32_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc128 <4 x float> @__svml_exp2f4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -836,4 +836,44 @@ for.end:
-   ret void
- }
- 
-+; CHECK-LABEL: @atan2_finite
-+; CHECK: intel_svmlcc256 <4 x double> @__svml_atan24(
-+; CHECK: intel_svmlcc256 <4 x double> @__svml_atan24(
-+; CHECK: ret
-+
-+declare double @__atan2_finite(double, double) local_unnamed_addr #0
-+
-+define void @atan2_finite([100 x double]* nocapture %varray) local_unnamed_addr #0 {
-+entry:
-+  br label %for.cond1.preheader
-+
-+for.cond1.preheader:                              ; preds = %for.inc7, %entry
-+  %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for.inc7 ]
-+  %0 = trunc i64 %indvars.iv19 to i32
-+  %conv = sitofp i32 %0 to double
-+  br label %for.body3
-+
-+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
-+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
-+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-+  %1 = trunc i64 %indvars.iv.next to i32
-+  %conv4 = sitofp i32 %1 to double
-+  %call = tail call fast double @__atan2_finite(double %conv, double %conv4)
-+  %arrayidx6 = getelementptr inbounds [100 x double], [100 x double]* %varray, i64 %indvars.iv19, i64 %indvars.iv
-+  store double %call, double* %arrayidx6, align 8
-+  %exitcond = icmp eq i64 %indvars.iv.next, 100
-+  br i1 %exitcond, label %for.inc7, label %for.body3, !llvm.loop !5
-+
-+for.inc7:                                         ; preds = %for.body3
-+  %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
-+  %exitcond21 = icmp eq i64 %indvars.iv.next20, 100
-+  br i1 %exitcond21, label %for.end9, label %for.cond1.preheader
-+
-+for.end9:                                         ; preds = %for.inc7
-+  ret void
-+}
-+
- attributes #0 = { nounwind readnone }
-+!5 = distinct !{!5, !6, !7}
-+!6 = !{!"llvm.loop.vectorize.width", i32 8}
-+!7 = !{!"llvm.loop.vectorize.enable", i1 true}
-diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll
-new file mode 100644
-index 0000000000000..326c763994343
---- /dev/null
-+++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll
-@@ -0,0 +1,513 @@
-+; Check legalization of SVML calls, including intrinsic versions (like @llvm.<fn_name>.<type>).
-+
-+; RUN: opt -vector-library=SVML -inject-tli-mappings -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s
-+
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-+target triple = "x86_64-unknown-linux-gnu"
-+
-+declare double @sin(double) #0
-+declare float @sinf(float) #0
-+declare double @llvm.sin.f64(double) #0
-+declare float @llvm.sin.f32(float) #0
-+
-+declare double @cos(double) #0
-+declare float @cosf(float) #0
-+declare double @llvm.cos.f64(double) #0
-+declare float @llvm.cos.f32(float) #0
-+
-+declare double @pow(double, double) #0
-+declare float @powf(float, float) #0
-+declare double @llvm.pow.f64(double, double) #0
-+declare float @llvm.pow.f32(float, float) #0
-+
-+declare double @exp(double) #0
-+declare float @expf(float) #0
-+declare double @llvm.exp.f64(double) #0
-+declare float @llvm.exp.f32(float) #0
-+
-+declare double @log(double) #0
-+declare float @logf(float) #0
-+declare double @llvm.log.f64(double) #0
-+declare float @llvm.log.f32(float) #0
-+
-+
-+define void @sin_f64(double* nocapture %varray) {
-+; CHECK-LABEL: @sin_f64(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP2:%.*]])
-+; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %call = tail call double @sin(double %conv)
-+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %call, double* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @sin_f32(float* nocapture %varray) {
-+; CHECK-LABEL: @sin_f32(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_sinf8_ha(<8 x float> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %call = tail call float @sinf(float %conv)
-+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %call, float* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @sin_f64_intrinsic(double* nocapture %varray) {
-+; CHECK-LABEL: @sin_f64_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP2:%.*]])
-+; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %call = tail call double @llvm.sin.f64(double %conv)
-+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %call, double* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @sin_f32_intrinsic(float* nocapture %varray) {
-+; CHECK-LABEL: @sin_f32_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_sinf8_ha(<8 x float> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %call = tail call float @llvm.sin.f32(float %conv)
-+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %call, float* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @cos_f64(double* nocapture %varray) {
-+; CHECK-LABEL: @cos_f64(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP2:%.*]])
-+; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %call = tail call double @cos(double %conv)
-+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %call, double* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @cos_f32(float* nocapture %varray) {
-+; CHECK-LABEL: @cos_f32(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_cosf8_ha(<8 x float> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %call = tail call float @cosf(float %conv)
-+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %call, float* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @cos_f64_intrinsic(double* nocapture %varray) {
-+; CHECK-LABEL: @cos_f64_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP2:%.*]])
-+; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %call = tail call double @llvm.cos.f64(double %conv)
-+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %call, double* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @cos_f32_intrinsic(float* nocapture %varray) {
-+; CHECK-LABEL: @cos_f32_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_cosf8_ha(<8 x float> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %call = tail call float @llvm.cos.f32(float %conv)
-+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %call, float* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
-+; CHECK-LABEL: @pow_f64(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP2:%.*]], <4 x double> [[TMP3:%.*]])
-+; CHECK:    [[TMP4:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP5:%.*]], <4 x double> [[TMP6:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
-+  %tmp1 = load double, double* %arrayidx, align 4
-+  %tmp2 = tail call double @pow(double %conv, double %tmp1)
-+  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %tmp2, double* %arrayidx2, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
-+; CHECK-LABEL: @pow_f64_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP2:%.*]], <4 x double> [[TMP3:%.*]])
-+; CHECK:    [[TMP4:%.*]] = call intel_svmlcc256 <4 x double> @__svml_pow4_ha(<4 x double> [[TMP5:%.*]], <4 x double> [[TMP6:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
-+  %tmp1 = load double, double* %arrayidx, align 4
-+  %tmp2 = tail call double @llvm.pow.f64(double %conv, double %tmp1)
-+  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %tmp2, double* %arrayidx2, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
-+; CHECK-LABEL: @pow_f32(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_powf8_ha(<8 x float> [[TMP2:%.*]], <8 x float> [[WIDE_LOAD:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
-+  %tmp1 = load float, float* %arrayidx, align 4
-+  %tmp2 = tail call float @powf(float %conv, float %tmp1)
-+  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %tmp2, float* %arrayidx2, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
-+; CHECK-LABEL: @pow_f32_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_powf8_ha(<8 x float> [[TMP2:%.*]], <8 x float> [[TMP3:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
-+  %tmp1 = load float, float* %arrayidx, align 4
-+  %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1)
-+  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %tmp2, float* %arrayidx2, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @exp_f64(double* nocapture %varray) {
-+; CHECK-LABEL: @exp_f64(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP2:%.*]])
-+; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %call = tail call double @exp(double %conv)
-+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %call, double* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @exp_f32(float* nocapture %varray) {
-+; CHECK-LABEL: @exp_f32(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_expf8_ha(<8 x float> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %call = tail call float @expf(float %conv)
-+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %call, float* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @exp_f64_intrinsic(double* nocapture %varray) {
-+; CHECK-LABEL: @exp_f64_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP2:%.*]])
-+; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %call = tail call double @llvm.exp.f64(double %conv)
-+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %call, double* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @exp_f32_intrinsic(float* nocapture %varray) {
-+; CHECK-LABEL: @exp_f32_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_expf8_ha(<8 x float> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %call = tail call float @llvm.exp.f32(float %conv)
-+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %call, float* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @log_f64(double* nocapture %varray) {
-+; CHECK-LABEL: @log_f64(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP2:%.*]])
-+; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %call = tail call double @log(double %conv)
-+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %call, double* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @log_f32(float* nocapture %varray) {
-+; CHECK-LABEL: @log_f32(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_logf8_ha(<8 x float> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %call = tail call float @logf(float %conv)
-+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %call, float* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @log_f64_intrinsic(double* nocapture %varray) {
-+; CHECK-LABEL: @log_f64_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP2:%.*]])
-+; CHECK:    [[TMP3:%.*]] = call intel_svmlcc256 <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %call = tail call double @llvm.log.f64(double %conv)
-+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %call, double* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @log_f32_intrinsic(float* nocapture %varray) {
-+; CHECK-LABEL: @log_f32_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc256 <8 x float> @__svml_logf8_ha(<8 x float> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %call = tail call float @llvm.log.f32(float %conv)
-+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %call, float* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+attributes #0 = { nounwind readnone }
-+
-diff --git a/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll
-new file mode 100644
-index 0000000000000..9422653445dc2
---- /dev/null
-+++ b/llvm-14.0.6.src/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll
-@@ -0,0 +1,61 @@
-+; Check that vector codegen splits illegal sin8 call to two sin4 calls on AVX for double datatype.
-+; The C code used to generate this test:
-+
-+; #include <math.h>
-+;
-+; void foo(double *a, int N){
-+;   int i;
-+; #pragma clang loop vectorize_width(8)
-+;   for (i=0;i<N;i++){
-+;     a[i] = sin(i);
-+;   }
-+; }
-+
-+; RUN: opt -vector-library=SVML -inject-tli-mappings -loop-vectorize -force-vector-width=8 -mattr=avx -S < %s | FileCheck %s
-+
-+; CHECK: [[I1:%.*]] = sitofp <8 x i32> [[I0:%.*]] to <8 x double>
-+; CHECK-NEXT: [[S1:%shuffle.*]] = shufflevector <8 x double> [[I1]], <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-+; CHECK-NEXT: [[I2:%.*]] = call fast intel_svmlcc256 <4 x double> @__svml_sin4(<4 x double> [[S1]])
-+; CHECK-NEXT: [[S2:%shuffle.*]] = shufflevector <8 x double> [[I1]], <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-+; CHECK-NEXT: [[I3:%.*]] = call fast intel_svmlcc256 <4 x double> @__svml_sin4(<4 x double> [[S2]])
-+; CHECK-NEXT: [[comb:%combined.*]] = shufflevector <4 x double> [[I2]], <4 x double> [[I3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-+; CHECK: store <8 x double> [[comb]], <8 x double>* [[TMP:%.*]], align 8
-+
-+
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-+target triple = "x86_64-unknown-linux-gnu"
-+
-+; Function Attrs: nounwind uwtable
-+define dso_local void @foo(double* nocapture %a, i32 %N) local_unnamed_addr #0 {
-+entry:
-+  %cmp5 = icmp sgt i32 %N, 0
-+  br i1 %cmp5, label %for.body.preheader, label %for.end
-+
-+for.body.preheader:                               ; preds = %entry
-+  %wide.trip.count = zext i32 %N to i64
-+  br label %for.body
-+
-+for.body:                                         ; preds = %for.body, %for.body.preheader
-+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-+  %0 = trunc i64 %indvars.iv to i32
-+  %conv = sitofp i32 %0 to double
-+  %call = tail call fast double @sin(double %conv) #2
-+  %arrayidx = getelementptr inbounds double, double* %a, i64 %indvars.iv
-+  store double %call, double* %arrayidx, align 8, !tbaa !2
-+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
-+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !6
-+
-+for.end:                                          ; preds = %for.body, %entry
-+  ret void
-+}
-+
-+; Function Attrs: nounwind
-+declare dso_local double @sin(double) local_unnamed_addr #1
-+
-+!2 = !{!3, !3, i64 0}
-+!3 = !{!"double", !4, i64 0}
-+!4 = !{!"omnipotent char", !5, i64 0}
-+!5 = !{!"Simple C/C++ TBAA"}
-+!6 = distinct !{!6, !7}
-+!7 = !{!"llvm.loop.vectorize.width", i32 8}
-diff --git a/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll b/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll
-index e8c83c4d9bd1f..615fdc29176a2 100644
---- a/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll
-+++ b/llvm-14.0.6.src/test/Transforms/Util/add-TLI-mappings.ll
-@@ -12,12 +12,12 @@ target triple = "x86_64-unknown-linux-gnu"
- 
- ; COMMON-LABEL: @llvm.compiler.used = appending global
- ; SVML-SAME:        [6 x i8*] [
--; SVML-SAME:          i8* bitcast (<2 x double> (<2 x double>)* @__svml_sin2 to i8*),
--; SVML-SAME:          i8* bitcast (<4 x double> (<4 x double>)* @__svml_sin4 to i8*),
--; SVML-SAME:          i8* bitcast (<8 x double> (<8 x double>)* @__svml_sin8 to i8*),
--; SVML-SAME:          i8* bitcast (<4 x float> (<4 x float>)* @__svml_log10f4 to i8*),
--; SVML-SAME:          i8* bitcast (<8 x float> (<8 x float>)* @__svml_log10f8 to i8*),
--; SVML-SAME:          i8* bitcast (<16 x float> (<16 x float>)* @__svml_log10f16 to i8*)
-+; SVML-SAME:          i8* bitcast (<2 x double> (<2 x double>)* @__svml_sin2_ha to i8*),
-+; SVML-SAME:          i8* bitcast (<4 x double> (<4 x double>)* @__svml_sin4_ha to i8*),
-+; SVML-SAME:          i8* bitcast (<8 x double> (<8 x double>)* @__svml_sin8_ha to i8*),
-+; SVML-SAME:          i8* bitcast (<4 x float> (<4 x float>)* @__svml_log10f4_ha to i8*),
-+; SVML-SAME:          i8* bitcast (<8 x float> (<8 x float>)* @__svml_log10f8_ha to i8*),
-+; SVML-SAME:          i8* bitcast (<16 x float> (<16 x float>)* @__svml_log10f16_ha to i8*)
- ; MASSV-SAME:       [2 x i8*] [
- ; MASSV-SAME:         i8* bitcast (<2 x double> (<2 x double>)* @__sind2 to i8*),
- ; MASSV-SAME:         i8* bitcast (<4 x float> (<4 x float>)* @__log10f4 to i8*)
-@@ -59,9 +59,9 @@ declare float @llvm.log10.f32(float) #0
- attributes #0 = { nounwind readnone }
- 
- ; SVML:      attributes #[[SIN]] = { "vector-function-abi-variant"=
--; SVML-SAME:   "_ZGV_LLVM_N2v_sin(__svml_sin2),
--; SVML-SAME:   _ZGV_LLVM_N4v_sin(__svml_sin4),
--; SVML-SAME:   _ZGV_LLVM_N8v_sin(__svml_sin8)" }
-+; SVML-SAME:   "_ZGV_LLVM_N2v_sin(__svml_sin2_ha),
-+; SVML-SAME:   _ZGV_LLVM_N4v_sin(__svml_sin4_ha),
-+; SVML-SAME:   _ZGV_LLVM_N8v_sin(__svml_sin8_ha)" }
- 
- ; MASSV:      attributes #[[SIN]] = { "vector-function-abi-variant"=
- ; MASSV-SAME:   "_ZGV_LLVM_N2v_sin(__sind2)" }
-diff --git a/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt b/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt
-index 97df6a55d1b59..199e0285c9e5d 100644
---- a/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt
-+++ b/llvm-14.0.6.src/utils/TableGen/CMakeLists.txt
-@@ -47,6 +47,7 @@ add_tablegen(llvm-tblgen LLVM
-   SearchableTableEmitter.cpp
-   SubtargetEmitter.cpp
-   SubtargetFeatureInfo.cpp
-+  SVMLEmitter.cpp
-   TableGen.cpp
-   Types.cpp
-   X86DisassemblerTables.cpp
-diff --git a/llvm-14.0.6.src/utils/TableGen/SVMLEmitter.cpp b/llvm-14.0.6.src/utils/TableGen/SVMLEmitter.cpp
-new file mode 100644
-index 0000000000000..a5aeea48db28b
---- /dev/null
-+++ b/llvm-14.0.6.src/utils/TableGen/SVMLEmitter.cpp
-@@ -0,0 +1,110 @@
-+//===------ SVMLEmitter.cpp - Generate SVML function variants -------------===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This tablegen backend emits the scalar to svml function map for TLI.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "CodeGenTarget.h"
-+#include "llvm/Support/Format.h"
-+#include "llvm/TableGen/Error.h"
-+#include "llvm/TableGen/Record.h"
-+#include "llvm/TableGen/TableGenBackend.h"
-+#include <map>
-+#include <vector>
-+
-+using namespace llvm;
-+
-+#define DEBUG_TYPE "SVMLVariants"
-+#include "llvm/Support/Debug.h"
-+
-+namespace {
-+
-+class SVMLVariantsEmitter {
-+
-+  RecordKeeper &Records;
-+
-+private:
-+  void emitSVMLVariants(raw_ostream &OS);
-+
-+public:
-+  SVMLVariantsEmitter(RecordKeeper &R) : Records(R) {}
-+
-+  void run(raw_ostream &OS);
-+};
-+} // End anonymous namespace
-+
-+/// \brief Emit the set of SVML variant function names.
-+// The default is to emit the high accuracy SVML variants until a mechanism is
-+// introduced to allow a selection of different variants through precision
-+// requirements specified by the user. This code generates mappings to svml
-+// that are in the scalar form of llvm intrinsics, math library calls, or the
-+// finite variants of math library calls.
-+void SVMLVariantsEmitter::emitSVMLVariants(raw_ostream &OS) {
-+
-+  const unsigned MinSinglePrecVL = 4;
-+  const unsigned MaxSinglePrecVL = 16;
-+  const unsigned MinDoublePrecVL = 2;
-+  const unsigned MaxDoublePrecVL = 8;
-+
-+  OS << "#ifdef GET_SVML_VARIANTS\n";
-+
-+  for (const auto &D : Records.getAllDerivedDefinitions("SvmlVariant")) {
-+    StringRef SvmlVariantNameStr = D->getName();
-+    // Single Precision SVML
-+    for (unsigned VL = MinSinglePrecVL; VL <= MaxSinglePrecVL; VL *= 2) {
-+      // Emit the scalar math library function to svml function entry.
-+      OS << "{\"" << SvmlVariantNameStr << "f" << "\", ";
-+      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
-+         << "ElementCount::getFixed(" << VL << ")},\n";
-+
-+      // Emit the scalar intrinsic to svml function entry.
-+      OS << "{\"" << "llvm." << SvmlVariantNameStr << ".f32" << "\", ";
-+      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
-+         << "ElementCount::getFixed(" << VL << ")},\n";
-+
-+      // Emit the finite math library function to svml function entry.
-+      OS << "{\"__" << SvmlVariantNameStr << "f_finite" << "\", ";
-+      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
-+         << "ElementCount::getFixed(" << VL << ")},\n";
-+    }
-+
-+    // Double Precision SVML
-+    for (unsigned VL = MinDoublePrecVL; VL <= MaxDoublePrecVL; VL *= 2) {
-+      // Emit the scalar math library function to svml function entry.
-+      OS << "{\"" << SvmlVariantNameStr << "\", ";
-+      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " << "ElementCount::getFixed(" << VL
-+         << ")},\n";
-+
-+      // Emit the scalar intrinsic to svml function entry.
-+      OS << "{\"" << "llvm." << SvmlVariantNameStr << ".f64" << "\", ";
-+      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " << "ElementCount::getFixed(" << VL
-+         << ")},\n";
-+
-+      // Emit the finite math library function to svml function entry.
-+      OS << "{\"__" << SvmlVariantNameStr << "_finite" << "\", ";
-+      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", "
-+         << "ElementCount::getFixed(" << VL << ")},\n";
-+    }
-+  }
-+
-+  OS << "#endif // GET_SVML_VARIANTS\n\n";
-+}
-+
-+void SVMLVariantsEmitter::run(raw_ostream &OS) {
-+  emitSVMLVariants(OS);
-+}
-+
-+namespace llvm {
-+
-+void EmitSVMLVariants(RecordKeeper &RK, raw_ostream &OS) {
-+  SVMLVariantsEmitter(RK).run(OS);
-+}
-+
-+} // End llvm namespace
-diff --git a/llvm-14.0.6.src/utils/TableGen/TableGen.cpp b/llvm-14.0.6.src/utils/TableGen/TableGen.cpp
-index 2d4a45f889be6..603d0c223b33a 100644
---- a/llvm-14.0.6.src/utils/TableGen/TableGen.cpp
-+++ b/llvm-14.0.6.src/utils/TableGen/TableGen.cpp
-@@ -57,6 +57,7 @@ enum ActionType {
-   GenAutomata,
-   GenDirectivesEnumDecl,
-   GenDirectivesEnumImpl,
-+  GenSVMLVariants,
- };
- 
- namespace llvm {
-@@ -138,7 +139,9 @@ cl::opt<ActionType> Action(
-         clEnumValN(GenDirectivesEnumDecl, "gen-directive-decl",
-                    "Generate directive related declaration code (header file)"),
-         clEnumValN(GenDirectivesEnumImpl, "gen-directive-impl",
--                   "Generate directive related implementation code")));
-+                   "Generate directive related implementation code"),
-+        clEnumValN(GenSVMLVariants, "gen-svml",
-+                   "Generate SVML variant function names")));
- 
- cl::OptionCategory PrintEnumsCat("Options for -print-enums");
- cl::opt<std::string> Class("class", cl::desc("Print Enum list for this class"),
-@@ -272,6 +275,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
-   case GenDirectivesEnumImpl:
-     EmitDirectivesImpl(Records, OS);
-     break;
-+  case GenSVMLVariants:
-+    EmitSVMLVariants(Records, OS);
-+    break;
-   }
- 
-   return false;
-diff --git a/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h b/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h
-index 71db8dc77b052..86c3a3068c2dc 100644
---- a/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h
-+++ b/llvm-14.0.6.src/utils/TableGen/TableGenBackends.h
-@@ -93,6 +93,7 @@ void EmitExegesis(RecordKeeper &RK, raw_ostream &OS);
- void EmitAutomata(RecordKeeper &RK, raw_ostream &OS);
- void EmitDirectivesDecl(RecordKeeper &RK, raw_ostream &OS);
- void EmitDirectivesImpl(RecordKeeper &RK, raw_ostream &OS);
-+void EmitSVMLVariants(RecordKeeper &RK, raw_ostream &OS);
- 
- } // End llvm namespace
- 
-diff --git a/llvm-14.0.6.src/utils/vim/syntax/llvm.vim b/llvm-14.0.6.src/utils/vim/syntax/llvm.vim
-index 205db16b7d8cd..2572ab5a59e1b 100644
---- a/llvm-14.0.6.src/utils/vim/syntax/llvm.vim
-+++ b/llvm-14.0.6.src/utils/vim/syntax/llvm.vim
-@@ -104,6 +104,7 @@ syn keyword llvmKeyword
-       \ inreg
-       \ intel_ocl_bicc
-       \ inteldialect
-+      \ intel_svmlcc
-       \ internal
-       \ jumptable
-       \ linkonce
diff --git a/conda-recipes/llvmdev/bld.bat b/conda-recipes/llvmdev/bld.bat
index 4d16d84b1..ec84d9897 100644
--- a/conda-recipes/llvmdev/bld.bat
+++ b/conda-recipes/llvmdev/bld.bat
@@ -1,94 +1,59 @@
-setlocal EnableDelayedExpansion
-FOR /D %%d IN (llvm-*.src) DO (MKLINK /J llvm %%d
-if !errorlevel! neq 0 exit /b %errorlevel%)
-FOR /D %%d IN (lld-*.src) DO (MKLINK /J lld %%d
-if !errorlevel! neq 0 exit /b %errorlevel%)
-FOR /D %%d IN (unwind\libunwind-*.src) DO (MKLINK /J libunwind %%d
-if !errorlevel! neq 0 exit /b %errorlevel%)
-
-DIR
+REM base on https://github.com/AnacondaRecipes/llvmdev-feedstock/blob/master/recipe/bld.bat
+echo on
 
 mkdir build
 cd build
 
-set BUILD_CONFIG=Release
-
-REM === Configure step ===
-
-REM allow setting the targets to build as an environment variable
-if "%LLVM_TARGETS_TO_BUILD%"=="" (
-    set "LLVM_TARGETS_TO_BUILD=all"
-)
-if "%ARCH%"=="32" (
-    set "ARCH_POSTFIX="
-    set "GEN_ARCH=Win32"
-) else (
-    set "ARCH_POSTFIX= Win64"
-    set "GEN_ARCH=x64"
-)
-
-REM The platform toolset host arch is set to x64 so as to use the 64bit linker,
-REM the 32bit linker heap is too small for llvm8 so it tries and falls over to
-REM the 64bit linker anyway. This must be passed in to certain generators as
-REM '-Thost x64'.
-set PreferredToolArchitecture=x64
-
-set MAX_INDEX_CMAKE_GENERATOR=0
-
-set "CMAKE_GENERATOR[0]=Visual Studio 16 2019"
+REM remove GL flag for now
+set "CXXFLAGS=-MD"
+set "CC=cl.exe"
+set "CXX=cl.exe"
 
-set "CMAKE_GENERATOR_ARCHITECTURE[0]=%GEN_ARCH%"
-
-set "CMAKE_GENERATOR_TOOLSET[0]=v142"
-
-REM Reduce build times and package size by removing unused stuff
-REM BENCHMARKS (new for llvm8) don't build under Visual Studio 14 2015
-set CMAKE_CUSTOM=-DLLVM_TARGETS_TO_BUILD="%LLVM_TARGETS_TO_BUILD%" ^
-    -DLLVM_ENABLE_PROJECTS:STRING=lld ^
-    -DLLVM_ENABLE_ZLIB=OFF ^
-    -DLLVM_INCLUDE_UTILS=ON ^
+cmake -G "Ninja" ^
+    -DCMAKE_BUILD_TYPE="Release" ^
+    -DCMAKE_PREFIX_PATH=%LIBRARY_PREFIX% ^
+    -DCMAKE_INSTALL_PREFIX:PATH=%LIBRARY_PREFIX% ^
+    -DLLVM_USE_INTEL_JITEVENTS=ON ^
+    -DLLVM_ENABLE_LIBXML2=FORCE_ON ^
+    -DLLVM_ENABLE_RTTI=ON ^
+    -DLLVM_ENABLE_ZLIB=FORCE_ON ^
+    -DLLVM_ENABLE_ZSTD=FORCE_ON ^
+    -DLLVM_INCLUDE_BENCHMARKS=OFF ^
     -DLLVM_INCLUDE_DOCS=OFF ^
     -DLLVM_INCLUDE_EXAMPLES=OFF ^
+    -DLLVM_INCLUDE_TESTS=ON ^
+    -DLLVM_INCLUDE_UTILS=ON ^
+    -DLLVM_INSTALL_UTILS=ON ^
+    -DLLVM_UTILS_INSTALL_DIR=libexec\llvm ^
+    -DLLVM_BUILD_LLVM_C_DYLIB=no ^
+    -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly ^
+    -DCMAKE_POLICY_DEFAULT_CMP0111=NEW ^
+    -DLLVM_ENABLE_PROJECTS:STRING=lld;compiler-rt ^
     -DLLVM_ENABLE_ASSERTIONS=ON ^
-    -DLLVM_USE_INTEL_JITEVENTS=ON ^
-    -DLLVM_INCLUDE_BENCHMARKS=OFF ^
     -DLLVM_ENABLE_DIA_SDK=OFF ^
-    -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly
-
-REM try all compatible visual studio toolsets to find one that is installed
-setlocal enabledelayedexpansion
-for /l %%n in (0,1,%MAX_INDEX_CMAKE_GENERATOR%) do (
-    cmake -G "!CMAKE_GENERATOR[%%n]!" ^
-          -A "!CMAKE_GENERATOR_ARCHITECTURE[%%n]!" ^
-          -T "!CMAKE_GENERATOR_TOOLSET[%%n]!" ^
-          -DCMAKE_BUILD_TYPE="%BUILD_CONFIG%" ^
-          -DCMAKE_PREFIX_PATH="%LIBRARY_PREFIX%" ^
-          -DCMAKE_INSTALL_PREFIX:PATH="%LIBRARY_PREFIX%" ^
-          %CMAKE_CUSTOM% "%SRC_DIR%\llvm"
-    if not errorlevel 1 goto configuration_successful
-    del CMakeCache.txt
-)
-
-REM no compatible visual studio toolset was found
-if errorlevel 1 exit 1
-
-:configuration_successful
-endlocal
-
-REM === Build step ===
-cmake --build . --config "%BUILD_CONFIG%"
-if errorlevel 1 exit 1
-
-REM === Install step ===
-cmake --build . --config "%BUILD_CONFIG%" --target install
-if errorlevel 1 exit 1
-
-REM From: https://github.com/conda-forge/llvmdev-feedstock/pull/53
-"%BUILD_CONFIG%\bin\opt" -S -vector-library=SVML -mcpu=haswell -O3 "%RECIPE_DIR%\numba-3016.ll" | "%BUILD_CONFIG%\bin\FileCheck" "%RECIPE_DIR%\numba-3016.ll"
-if errorlevel 1 exit 1
-
-REM This is technically how to run the suite, but it will only run in an
-REM enhanced unix-like shell which has functions like `grep` available.
-REM cd ..\test
-REM "%PYTHON%" "..\build\%BUILD_CONFIG%\bin\llvm-lit.py" -vv Transforms ExecutionEngine Analysis CodeGen/X86
-REM if errorlevel 1 exit 1
+    -DCOMPILER_RT_BUILD_BUILTINS=ON ^
+    -DCOMPILER_RT_BUILTINS_HIDE_SYMBOLS=OFF ^
+    -DCOMPILER_RT_BUILD_LIBFUZZER=OFF ^
+    -DCOMPILER_RT_BUILD_CRT=OFF ^
+    -DCOMPILER_RT_BUILD_MEMPROF=OFF ^
+    -DCOMPILER_RT_BUILD_PROFILE=OFF ^
+    -DCOMPILER_RT_BUILD_SANITIZERS=OFF ^
+    -DCOMPILER_RT_BUILD_XRAY=OFF ^
+    -DCOMPILER_RT_BUILD_GWP_ASAN=OFF ^
+    -DCOMPILER_RT_BUILD_ORC=OFF ^
+    -DCOMPILER_RT_INCLUDE_TESTS=OFF ^
+    %SRC_DIR%/llvm
+if %ERRORLEVEL% neq 0 exit 1
+
+cmake --build .
+if %ERRORLEVEL% neq 0 exit 1
+
+cmake --build . --target install
+
+if %ERRORLEVEL% neq 0 exit 1
+
+REM bin\opt -S -vector-library=SVML -mcpu=haswell -O3 %RECIPE_DIR%\numba-3016.ll | bin\FileCheck %RECIPE_DIR%\numba-3016.ll
+REM if %ERRORLEVEL% neq 0 exit 1
+
+cd ..\llvm\test
+python ..\..\build\bin\llvm-lit.py -vv Transforms ExecutionEngine Analysis CodeGen/X86
diff --git a/conda-recipes/llvmdev/build.sh b/conda-recipes/llvmdev/build.sh
index caccfe127..e1adebd59 100644
--- a/conda-recipes/llvmdev/build.sh
+++ b/conda-recipes/llvmdev/build.sh
@@ -4,97 +4,111 @@
 
 set -x
 
-# allow setting the targets to build as an environment variable
-LLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD:-"all"}
+# Make osx work like linux.
+sed -i.bak "s/NOT APPLE AND ARG_SONAME/ARG_SONAME/g" llvm/cmake/modules/AddLLVM.cmake
+sed -i.bak "s/NOT APPLE AND NOT ARG_SONAME/NOT ARG_SONAME/g" llvm/cmake/modules/AddLLVM.cmake
 
-# This is the clang compiler prefix
-if [[ $build_platform == osx-arm64 ]]; then
-    DARWIN_TARGET=arm64-apple-darwin20.0.0
-else
-    DARWIN_TARGET=x86_64-apple-darwin13.4.0
-fi
+mkdir build
+cd build
+
+export CPU_COUNT=4
 
-mv llvm-*.src llvm
-mv lld-*.src lld
-mv unwind/libunwind-*.src libunwind
-
-declare -a _cmake_config
-_cmake_config+=(-DCMAKE_INSTALL_PREFIX:PATH=${PREFIX})
-_cmake_config+=(-DCMAKE_BUILD_TYPE:STRING=Release)
-_cmake_config+=(-DLLVM_ENABLE_PROJECTS:STRING="lld")
-# The bootstrap clang I use was built with a static libLLVMObject.a and I trying to get the same here
-# _cmake_config+=(-DBUILD_SHARED_LIBS:BOOL=ON)
-_cmake_config+=(-DLLVM_ENABLE_ASSERTIONS:BOOL=ON)
-_cmake_config+=(-DLINK_POLLY_INTO_TOOLS:BOOL=ON)
-# Don't really require libxml2. Turn it off explicitly to avoid accidentally linking to system libs
-_cmake_config+=(-DLLVM_ENABLE_LIBXML2:BOOL=OFF)
-# Urgh, llvm *really* wants to link to ncurses / terminfo and we *really* do not want it to.
-_cmake_config+=(-DHAVE_TERMINFO_CURSES=OFF)
-_cmake_config+=(-DLLVM_ENABLE_TERMINFO=OFF)
-# Sometimes these are reported as unused. Whatever.
-_cmake_config+=(-DHAVE_TERMINFO_NCURSES=OFF)
-_cmake_config+=(-DHAVE_TERMINFO_NCURSESW=OFF)
-_cmake_config+=(-DHAVE_TERMINFO_TERMINFO=OFF)
-_cmake_config+=(-DHAVE_TERMINFO_TINFO=OFF)
-_cmake_config+=(-DHAVE_TERMIOS_H=OFF)
-_cmake_config+=(-DCLANG_ENABLE_LIBXML=OFF)
-_cmake_config+=(-DLIBOMP_INSTALL_ALIASES=OFF)
-_cmake_config+=(-DLLVM_ENABLE_RTTI=OFF)
-_cmake_config+=(-DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD})
-_cmake_config+=(-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly)
-_cmake_config+=(-DLLVM_INCLUDE_UTILS=ON) # for llvm-lit
-_cmake_config+=(-DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF) # doesn't build without the rest of LLVM project
-# TODO :: It would be nice if we had a cross-ecosystem 'BUILD_TIME_LIMITED' env var we could use to
-#         disable these unnecessary but useful things.
-if [[ ${CONDA_FORGE} == yes ]]; then
-  _cmake_config+=(-DLLVM_INCLUDE_DOCS=OFF)
-  _cmake_config+=(-DLLVM_INCLUDE_EXAMPLES=OFF)
+CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_ENABLE_PROJECTS=lld;libunwind;compiler-rt"
+
+if [[ "$target_platform" == "linux-64" ]]; then
+  CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_USE_INTEL_JITEVENTS=ON"
 fi
-# Only valid when using the Ninja Generator AFAICT
-# _cmake_config+=(-DLLVM_PARALLEL_LINK_JOBS:STRING=1)
-# What about cross-compiling targetting Darwin here? Are any of these needed?
-if [[ $(uname) == Darwin ]]; then
-  _cmake_config+=(-DCMAKE_OSX_SYSROOT=${SYSROOT_DIR})
-  _cmake_config+=(-DDARWIN_macosx_CACHED_SYSROOT=${SYSROOT_DIR})
-  _cmake_config+=(-DCMAKE_OSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET})
-  _cmake_config+=(-DCMAKE_LIBTOOL=$(which ${DARWIN_TARGET}-libtool))
-  _cmake_config+=(-DLD64_EXECUTABLE=$(which ${DARWIN_TARGET}-ld))
-  _cmake_config+=(-DCMAKE_INSTALL_NAME_TOOL=$(which ${DARWIN_TARGET}-install_name_tool))
-  # Once we are using our libc++ (not until llvm_build_final), it will be single-arch only and not setting
-  # this causes link failures building the santizers since they respect DARWIN_osx_ARCHS. We may as well
-  # save some compilation time by setting this for all of our llvm builds.
-  _cmake_config+=(-DDARWIN_osx_ARCHS=x86_64)
-elif [[ $(uname) == Linux ]]; then
-  _cmake_config+=(-DLLVM_USE_INTEL_JITEVENTS=ON)
-#  _cmake_config+=(-DLLVM_BINUTILS_INCDIR=${PREFIX}/lib/gcc/${cpu_arch}-${vendor}-linux-gnu/${compiler_ver}/plugin/include)
+
+if [[ "$CC_FOR_BUILD" != "" && "$CC_FOR_BUILD" != "$CC" ]]; then
+  CMAKE_ARGS="${CMAKE_ARGS} -DCROSS_TOOLCHAIN_FLAGS_NATIVE=-DCMAKE_C_COMPILER=$CC_FOR_BUILD;-DCMAKE_CXX_COMPILER=$CXX_FOR_BUILD;-DCMAKE_C_FLAGS=-O2;-DCMAKE_CXX_FLAGS=-O2;-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath,${BUILD_PREFIX}/lib;-DCMAKE_MODULE_LINKER_FLAGS=;-DCMAKE_SHARED_LINKER_FLAGS=;-DCMAKE_STATIC_LINKER_FLAGS=;-DLLVM_INCLUDE_BENCHMARKS=OFF;"
+  CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_HOST_TRIPLE=$(echo $HOST | sed s/conda/unknown/g) -DLLVM_DEFAULT_TARGET_TRIPLE=$(echo $HOST | sed s/conda/unknown/g)"
 fi
 
-# For when the going gets tough:
-# _cmake_config+=(-Wdev)
-# _cmake_config+=(--debug-output)
-# _cmake_config+=(--trace-expand)
-# CPU_COUNT=1
+# disable -fno-plt due to https://bugs.llvm.org/show_bug.cgi?id=51863 due to some GCC bug
+if [[ "$target_platform" == "linux-ppc64le" ]]; then
+  CFLAGS="$(echo $CFLAGS | sed 's/-fno-plt //g')"
+  CXXFLAGS="$(echo $CXXFLAGS | sed 's/-fno-plt //g')"
+  CMAKE_ARGS="${CMAKE_ARGS} -DFFI_INCLUDE_DIR=$PREFIX/include"
+  CMAKE_ARGS="${CMAKE_ARGS} -DFFI_LIBRARY_DIR=$PREFIX/lib"
+fi
 
-mkdir build
-cd build
+if [[ $target_platform == osx-arm64 ]]; then
+  CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_ENABLE_WERROR=FALSE"
+fi
 
-cmake -G'Unix Makefiles'     \
-      "${_cmake_config[@]}"  \
+cmake -DCMAKE_INSTALL_PREFIX="${PREFIX}" \
+      -DCMAKE_BUILD_TYPE=Release \
+      -DCMAKE_LIBRARY_PATH="${PREFIX}" \
+      -DLLVM_ENABLE_LIBEDIT=OFF \
+      -DLLVM_ENABLE_LIBXML2=OFF \
+      -DLLVM_ENABLE_RTTI=ON \
+      -DLLVM_ENABLE_TERMINFO=OFF \
+      -DLLVM_INCLUDE_BENCHMARKS=OFF \
+      -DLLVM_INCLUDE_DOCS=OFF \
+      -DLLVM_INCLUDE_EXAMPLES=OFF \
+      -DLLVM_INCLUDE_GO_TESTS=OFF \
+      -DLLVM_INCLUDE_TESTS=ON \
+      -DLLVM_INCLUDE_UTILS=ON \
+      -DLLVM_INSTALL_UTILS=ON \
+      -DLLVM_UTILS_INSTALL_DIR=libexec/llvm \
+      -DLLVM_BUILD_LLVM_DYLIB=OFF \
+      -DLLVM_LINK_LLVM_DYLIB=OFF \
+      -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly \
+      -DLLVM_ENABLE_FFI=ON \
+      -DLLVM_ENABLE_Z3_SOLVER=OFF \
+      -DLLVM_OPTIMIZED_TABLEGEN=ON \
+      -DCMAKE_POLICY_DEFAULT_CMP0111=NEW \
+      -DCOMPILER_RT_BUILD_BUILTINS=ON \
+      -DCOMPILER_RT_BUILTINS_HIDE_SYMBOLS=OFF \
+      -DCOMPILER_RT_BUILD_LIBFUZZER=OFF \
+      -DCOMPILER_RT_BUILD_CRT=OFF \
+      -DCOMPILER_RT_BUILD_MEMPROF=OFF \
+      -DCOMPILER_RT_BUILD_PROFILE=OFF \
+      -DCOMPILER_RT_BUILD_SANITIZERS=OFF \
+      -DCOMPILER_RT_BUILD_XRAY=OFF \
+      -DCOMPILER_RT_BUILD_GWP_ASAN=OFF \
+      -DCOMPILER_RT_BUILD_ORC=OFF \
+      -DCOMPILER_RT_INCLUDE_TESTS=OFF \
+      ${CMAKE_ARGS} \
+      -GNinja \
       ../llvm
 
-ARCH=`uname -m`
-if [ $ARCH == 'armv7l' ]; then # RPi need thread count throttling
-    make -j2 VERBOSE=1
+
+ninja -j${CPU_COUNT}
+
+ninja install
+
+if [[ "${target_platform}" == "linux-64" || "${target_platform}" == "osx-64" ]]; then
+    export TEST_CPU_FLAG="-mcpu=haswell"
 else
-    make -j${CPU_COUNT} VERBOSE=1
+    export TEST_CPU_FLAG=""
 fi
 
-make check-llvm-unit || exit $?
+if [[ "$CONDA_BUILD_CROSS_COMPILATION" != "1" ]]; then
 
-# From: https://github.com/conda-forge/llvmdev-feedstock/pull/53
-make install || exit $?
+  echo "Testing on ${target_platform}"
+  # bin/opt -S -vector-library=SVML $TEST_CPU_FLAG -O3 $RECIPE_DIR/numba-3016.ll | bin/FileCheck $RECIPE_DIR/numba-3016.ll || exit $?
 
-# SVML tests on x86_64 arch only
-if [[ $ARCH == 'x86_64' ]]; then
-   bin/opt -S -vector-library=SVML -mcpu=haswell -O3 $RECIPE_DIR/numba-3016.ll | bin/FileCheck $RECIPE_DIR/numba-3016.ll || exit $?
+  if [[ "$target_platform" == linux* ]]; then
+    ln -s $(which $CC) $BUILD_PREFIX/bin/gcc
+
+    # These tests tests permission-based behaviour and probably fail because of some
+    # filesystem-related reason. They are sporadic failures and don't seem serious so they're excluded.
+    # Note that indents would introduce spaces into the environment variable
+    export LIT_FILTER_OUT='tools/llvm-ar/error-opening-permission.test|'\
+'tools/llvm-dwarfdump/X86/output.s|'\
+'tools/llvm-ifs/fail-file-write.test|'\
+'tools/llvm-ranlib/error-opening-permission.test'
+  fi
+
+  if [[ "$target_platform" == osx-* ]]; then
+    # This failure seems like something to do with the output format of ls -lu
+    # and looks harmless
+    export LIT_FILTER_OUT='tools/llvm-objcopy/ELF/strip-preserve-atime.test|'\
+'ExecutionEngine/Interpreter/intrinsics.ll'
+  fi
+
+  cd ../llvm/test
+  ${PYTHON} ../../build/bin/llvm-lit -vv Transforms ExecutionEngine Analysis CodeGen/X86
 fi
+
diff --git a/conda-recipes/llvmdev/conda_build_config.yaml b/conda-recipes/llvmdev/conda_build_config.yaml
index 81b7d08c3..1ae35296b 100644
--- a/conda-recipes/llvmdev/conda_build_config.yaml
+++ b/conda-recipes/llvmdev/conda_build_config.yaml
@@ -10,3 +10,11 @@ cxx_compiler_version:       # [linux]
 fortran_compiler_version:   # [linux]
   - 7                       # [linux and (x86_64 or ppc64le)]
   - 9                       # [linux and aarch64]
+
+c_compiler:              # [win]
+  - vs2019               # [win]
+cxx_compiler:            # [win]
+  - vs2019               # [win]
+
+MACOSX_SDK_VERSION:    # [osx and x86_64]
+  - 10.12              # [osx and x86_64]
diff --git a/conda-recipes/llvmdev/meta.yaml b/conda-recipes/llvmdev/meta.yaml
index 7676d234e..32d15a790 100644
--- a/conda-recipes/llvmdev/meta.yaml
+++ b/conda-recipes/llvmdev/meta.yaml
@@ -1,30 +1,27 @@
-{% set shortversion = "14.0" %}
-{% set version = "14.0.6" %}
-{% set sha256_llvm = "050922ecaaca5781fdf6631ea92bc715183f202f9d2f15147226f023414f619a" %}
-{% set sha256_lld = "0c28ce0496934d37d20fec96591032dd66af8d10178a45762e0e75e85cf95ad3" %}
-{% set sha256_libunwind = "3bbe9c23c73259fe39c045dc87d0b283236ba6e00750a226b2c2aeac4a51d86b" %}
-{% set build_number = "3" %}
+{% set shortversion = "15.0" %}
+{% set version = "15.0.7" %}
+{% set sha256_llvm = "8b5fcb24b4128cf04df1b0b9410ce8b1a729cb3c544e6da885d234280dedeac6" %}
+{% set build_number = "1" %}
 
 package:
   name: llvmdev
   version: {{ version }}
 
 source:
-  - url: https://github.com/llvm/llvm-project/releases/download/llvmorg-{{ version }}/llvm-{{ version }}.src.tar.xz
-    fn: llvm-{{ version }}.src.tar.xz
+  - url: https://github.com/llvm/llvm-project/releases/download/llvmorg-{{ version.replace(".rc", "-rc") }}/llvm-project-{{ version.replace(".rc", "rc") }}.src.tar.xz
     sha256: {{ sha256_llvm }}
     patches:
-    - ../llvm14-clear-gotoffsetmap.patch
-    - ../llvm14-remove-use-of-clonefile.patch
-    - ../llvm14-svml.patch
-  - url: https://github.com/llvm/llvm-project/releases/download/llvmorg-{{ version }}/lld-{{ version }}.src.tar.xz
-    fn: lld-{{ version }}.src.tar.xz
-    sha256: {{ sha256_lld }}
+    - ../llvm15-clear-gotoffsetmap.patch
+    - ../llvm15-remove-use-of-clonefile.patch
+    - ../llvm15-svml.patch
+    - ../compiler-rt-cfi-startproc-war.patch
+    - ../compiler-rt-macos-build.patch
 
-  - url: https://github.com/llvm/llvm-project/releases/download/llvmorg-{{ version }}/libunwind-{{ version }}.src.tar.xz
-    fn: libunwind-{{ version }}.src.tar.xz
-    sha256: {{ sha256_libunwind }}
-    folder: unwind
+    # Patches from conda-forge needed for windows to build
+    # backport of zlib patches, can be dropped for vs15.0.3, see
+    # https://reviews.llvm.org/D135457 & https://reviews.llvm.org/D136065
+    - patches/0002-CMake-Fix-Findzstd-module-for-shared-DLL-on-Windows.patch
+    - patches/no-windows-symlinks.patch
 
 build:
   number: {{ build_number }}
@@ -36,25 +33,22 @@ build:
 
 requirements:
   build:
-    # We cannot do this on macOS or windows
-    # OSX already has llvm so has to be handled
-    # at build.sh time
-    # Windows needs to build using vs2015_runtime
-    # irrespective of python version
-    - {{ compiler('c') }} # [unix and not (armv6l or armv7l)]
-    - {{ compiler('cxx') }} # [unix and not (armv6l or armv7l)]
+    - {{ compiler('cxx') }}
     - cmake
-    - make # [unix and not (armv6l or armv7l or aarch64)]
-    # Needed to unpack the source tarball
-    - m2w64-xz  # [win]
-    # Needed to build LLVM
+    - ninja
     - python >=3
+    - libcxx      # it is not defined{{ cxx_compiler_version }}  # [osx]
+    - patch       # [not win]
+    - m2-patch    # [win]
+    - git         # [(linux and x86_64)]
+
   host:
-    # needed for llc at runtime
-    - zlib # [not win]
-    - xar # [osx and x86_64]
-    # llvm-lit testing needs *a* python
-    - python # [not (armv6l or armv7l or aarch64 or win)]
+    - libcxx    # it is not defined{{ cxx_compiler_version }}  # [osx]
+    - libffi    # [unix]
+    # libxml2 supports a windows-only feature, see https://github.com/llvm/llvm-project/blob/llvmorg-17.0.6/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h
+    - libxml2   # [win]
+    - zlib 
+    - zstd 
 
 test:
   files:
diff --git a/conda-recipes/llvmdev_llvm15/patches/0002-CMake-Fix-Findzstd-module-for-shared-DLL-on-Windows.patch b/conda-recipes/llvmdev/patches/0002-CMake-Fix-Findzstd-module-for-shared-DLL-on-Windows.patch
similarity index 100%
rename from conda-recipes/llvmdev_llvm15/patches/0002-CMake-Fix-Findzstd-module-for-shared-DLL-on-Windows.patch
rename to conda-recipes/llvmdev/patches/0002-CMake-Fix-Findzstd-module-for-shared-DLL-on-Windows.patch
diff --git a/conda-recipes/llvmdev_llvm15/patches/no-windows-symlinks.patch b/conda-recipes/llvmdev/patches/no-windows-symlinks.patch
similarity index 100%
rename from conda-recipes/llvmdev_llvm15/patches/no-windows-symlinks.patch
rename to conda-recipes/llvmdev/patches/no-windows-symlinks.patch
diff --git a/conda-recipes/llvmdev_llvm15/bld.bat b/conda-recipes/llvmdev_llvm15/bld.bat
deleted file mode 100644
index ec84d9897..000000000
--- a/conda-recipes/llvmdev_llvm15/bld.bat
+++ /dev/null
@@ -1,59 +0,0 @@
-REM base on https://github.com/AnacondaRecipes/llvmdev-feedstock/blob/master/recipe/bld.bat
-echo on
-
-mkdir build
-cd build
-
-REM remove GL flag for now
-set "CXXFLAGS=-MD"
-set "CC=cl.exe"
-set "CXX=cl.exe"
-
-cmake -G "Ninja" ^
-    -DCMAKE_BUILD_TYPE="Release" ^
-    -DCMAKE_PREFIX_PATH=%LIBRARY_PREFIX% ^
-    -DCMAKE_INSTALL_PREFIX:PATH=%LIBRARY_PREFIX% ^
-    -DLLVM_USE_INTEL_JITEVENTS=ON ^
-    -DLLVM_ENABLE_LIBXML2=FORCE_ON ^
-    -DLLVM_ENABLE_RTTI=ON ^
-    -DLLVM_ENABLE_ZLIB=FORCE_ON ^
-    -DLLVM_ENABLE_ZSTD=FORCE_ON ^
-    -DLLVM_INCLUDE_BENCHMARKS=OFF ^
-    -DLLVM_INCLUDE_DOCS=OFF ^
-    -DLLVM_INCLUDE_EXAMPLES=OFF ^
-    -DLLVM_INCLUDE_TESTS=ON ^
-    -DLLVM_INCLUDE_UTILS=ON ^
-    -DLLVM_INSTALL_UTILS=ON ^
-    -DLLVM_UTILS_INSTALL_DIR=libexec\llvm ^
-    -DLLVM_BUILD_LLVM_C_DYLIB=no ^
-    -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly ^
-    -DCMAKE_POLICY_DEFAULT_CMP0111=NEW ^
-    -DLLVM_ENABLE_PROJECTS:STRING=lld;compiler-rt ^
-    -DLLVM_ENABLE_ASSERTIONS=ON ^
-    -DLLVM_ENABLE_DIA_SDK=OFF ^
-    -DCOMPILER_RT_BUILD_BUILTINS=ON ^
-    -DCOMPILER_RT_BUILTINS_HIDE_SYMBOLS=OFF ^
-    -DCOMPILER_RT_BUILD_LIBFUZZER=OFF ^
-    -DCOMPILER_RT_BUILD_CRT=OFF ^
-    -DCOMPILER_RT_BUILD_MEMPROF=OFF ^
-    -DCOMPILER_RT_BUILD_PROFILE=OFF ^
-    -DCOMPILER_RT_BUILD_SANITIZERS=OFF ^
-    -DCOMPILER_RT_BUILD_XRAY=OFF ^
-    -DCOMPILER_RT_BUILD_GWP_ASAN=OFF ^
-    -DCOMPILER_RT_BUILD_ORC=OFF ^
-    -DCOMPILER_RT_INCLUDE_TESTS=OFF ^
-    %SRC_DIR%/llvm
-if %ERRORLEVEL% neq 0 exit 1
-
-cmake --build .
-if %ERRORLEVEL% neq 0 exit 1
-
-cmake --build . --target install
-
-if %ERRORLEVEL% neq 0 exit 1
-
-REM bin\opt -S -vector-library=SVML -mcpu=haswell -O3 %RECIPE_DIR%\numba-3016.ll | bin\FileCheck %RECIPE_DIR%\numba-3016.ll
-REM if %ERRORLEVEL% neq 0 exit 1
-
-cd ..\llvm\test
-python ..\..\build\bin\llvm-lit.py -vv Transforms ExecutionEngine Analysis CodeGen/X86
diff --git a/conda-recipes/llvmdev_llvm15/build.sh b/conda-recipes/llvmdev_llvm15/build.sh
deleted file mode 100644
index e1adebd59..000000000
--- a/conda-recipes/llvmdev_llvm15/build.sh
+++ /dev/null
@@ -1,114 +0,0 @@
-#!/bin/bash
-
-# based on https://github.com/AnacondaRecipes/llvmdev-feedstock/blob/master/recipe/build.sh
-
-set -x
-
-# Make osx work like linux.
-sed -i.bak "s/NOT APPLE AND ARG_SONAME/ARG_SONAME/g" llvm/cmake/modules/AddLLVM.cmake
-sed -i.bak "s/NOT APPLE AND NOT ARG_SONAME/NOT ARG_SONAME/g" llvm/cmake/modules/AddLLVM.cmake
-
-mkdir build
-cd build
-
-export CPU_COUNT=4
-
-CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_ENABLE_PROJECTS=lld;libunwind;compiler-rt"
-
-if [[ "$target_platform" == "linux-64" ]]; then
-  CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_USE_INTEL_JITEVENTS=ON"
-fi
-
-if [[ "$CC_FOR_BUILD" != "" && "$CC_FOR_BUILD" != "$CC" ]]; then
-  CMAKE_ARGS="${CMAKE_ARGS} -DCROSS_TOOLCHAIN_FLAGS_NATIVE=-DCMAKE_C_COMPILER=$CC_FOR_BUILD;-DCMAKE_CXX_COMPILER=$CXX_FOR_BUILD;-DCMAKE_C_FLAGS=-O2;-DCMAKE_CXX_FLAGS=-O2;-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath,${BUILD_PREFIX}/lib;-DCMAKE_MODULE_LINKER_FLAGS=;-DCMAKE_SHARED_LINKER_FLAGS=;-DCMAKE_STATIC_LINKER_FLAGS=;-DLLVM_INCLUDE_BENCHMARKS=OFF;"
-  CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_HOST_TRIPLE=$(echo $HOST | sed s/conda/unknown/g) -DLLVM_DEFAULT_TARGET_TRIPLE=$(echo $HOST | sed s/conda/unknown/g)"
-fi
-
-# disable -fno-plt due to https://bugs.llvm.org/show_bug.cgi?id=51863 due to some GCC bug
-if [[ "$target_platform" == "linux-ppc64le" ]]; then
-  CFLAGS="$(echo $CFLAGS | sed 's/-fno-plt //g')"
-  CXXFLAGS="$(echo $CXXFLAGS | sed 's/-fno-plt //g')"
-  CMAKE_ARGS="${CMAKE_ARGS} -DFFI_INCLUDE_DIR=$PREFIX/include"
-  CMAKE_ARGS="${CMAKE_ARGS} -DFFI_LIBRARY_DIR=$PREFIX/lib"
-fi
-
-if [[ $target_platform == osx-arm64 ]]; then
-  CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_ENABLE_WERROR=FALSE"
-fi
-
-cmake -DCMAKE_INSTALL_PREFIX="${PREFIX}" \
-      -DCMAKE_BUILD_TYPE=Release \
-      -DCMAKE_LIBRARY_PATH="${PREFIX}" \
-      -DLLVM_ENABLE_LIBEDIT=OFF \
-      -DLLVM_ENABLE_LIBXML2=OFF \
-      -DLLVM_ENABLE_RTTI=ON \
-      -DLLVM_ENABLE_TERMINFO=OFF \
-      -DLLVM_INCLUDE_BENCHMARKS=OFF \
-      -DLLVM_INCLUDE_DOCS=OFF \
-      -DLLVM_INCLUDE_EXAMPLES=OFF \
-      -DLLVM_INCLUDE_GO_TESTS=OFF \
-      -DLLVM_INCLUDE_TESTS=ON \
-      -DLLVM_INCLUDE_UTILS=ON \
-      -DLLVM_INSTALL_UTILS=ON \
-      -DLLVM_UTILS_INSTALL_DIR=libexec/llvm \
-      -DLLVM_BUILD_LLVM_DYLIB=OFF \
-      -DLLVM_LINK_LLVM_DYLIB=OFF \
-      -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly \
-      -DLLVM_ENABLE_FFI=ON \
-      -DLLVM_ENABLE_Z3_SOLVER=OFF \
-      -DLLVM_OPTIMIZED_TABLEGEN=ON \
-      -DCMAKE_POLICY_DEFAULT_CMP0111=NEW \
-      -DCOMPILER_RT_BUILD_BUILTINS=ON \
-      -DCOMPILER_RT_BUILTINS_HIDE_SYMBOLS=OFF \
-      -DCOMPILER_RT_BUILD_LIBFUZZER=OFF \
-      -DCOMPILER_RT_BUILD_CRT=OFF \
-      -DCOMPILER_RT_BUILD_MEMPROF=OFF \
-      -DCOMPILER_RT_BUILD_PROFILE=OFF \
-      -DCOMPILER_RT_BUILD_SANITIZERS=OFF \
-      -DCOMPILER_RT_BUILD_XRAY=OFF \
-      -DCOMPILER_RT_BUILD_GWP_ASAN=OFF \
-      -DCOMPILER_RT_BUILD_ORC=OFF \
-      -DCOMPILER_RT_INCLUDE_TESTS=OFF \
-      ${CMAKE_ARGS} \
-      -GNinja \
-      ../llvm
-
-
-ninja -j${CPU_COUNT}
-
-ninja install
-
-if [[ "${target_platform}" == "linux-64" || "${target_platform}" == "osx-64" ]]; then
-    export TEST_CPU_FLAG="-mcpu=haswell"
-else
-    export TEST_CPU_FLAG=""
-fi
-
-if [[ "$CONDA_BUILD_CROSS_COMPILATION" != "1" ]]; then
-
-  echo "Testing on ${target_platform}"
-  # bin/opt -S -vector-library=SVML $TEST_CPU_FLAG -O3 $RECIPE_DIR/numba-3016.ll | bin/FileCheck $RECIPE_DIR/numba-3016.ll || exit $?
-
-  if [[ "$target_platform" == linux* ]]; then
-    ln -s $(which $CC) $BUILD_PREFIX/bin/gcc
-
-    # These tests tests permission-based behaviour and probably fail because of some
-    # filesystem-related reason. They are sporadic failures and don't seem serious so they're excluded.
-    # Note that indents would introduce spaces into the environment variable
-    export LIT_FILTER_OUT='tools/llvm-ar/error-opening-permission.test|'\
-'tools/llvm-dwarfdump/X86/output.s|'\
-'tools/llvm-ifs/fail-file-write.test|'\
-'tools/llvm-ranlib/error-opening-permission.test'
-  fi
-
-  if [[ "$target_platform" == osx-* ]]; then
-    # This failure seems like something to do with the output format of ls -lu
-    # and looks harmless
-    export LIT_FILTER_OUT='tools/llvm-objcopy/ELF/strip-preserve-atime.test|'\
-'ExecutionEngine/Interpreter/intrinsics.ll'
-  fi
-
-  cd ../llvm/test
-  ${PYTHON} ../../build/bin/llvm-lit -vv Transforms ExecutionEngine Analysis CodeGen/X86
-fi
-
diff --git a/conda-recipes/llvmdev_llvm15/conda_build_config.yaml b/conda-recipes/llvmdev_llvm15/conda_build_config.yaml
deleted file mode 100644
index 1ae35296b..000000000
--- a/conda-recipes/llvmdev_llvm15/conda_build_config.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-# Numba/llvmlite stack needs an older compiler for backwards compatability.
-c_compiler_version:         # [linux]
-  - 7                       # [linux and (x86_64 or ppc64le)]
-  - 9                       # [linux and aarch64]
-
-cxx_compiler_version:       # [linux]
-  - 7                       # [linux and (x86_64 or ppc64le)]
-  - 9                       # [linux and aarch64]
-
-fortran_compiler_version:   # [linux]
-  - 7                       # [linux and (x86_64 or ppc64le)]
-  - 9                       # [linux and aarch64]
-
-c_compiler:              # [win]
-  - vs2019               # [win]
-cxx_compiler:            # [win]
-  - vs2019               # [win]
-
-MACOSX_SDK_VERSION:    # [osx and x86_64]
-  - 10.12              # [osx and x86_64]
diff --git a/conda-recipes/llvmdev_llvm15/meta.yaml b/conda-recipes/llvmdev_llvm15/meta.yaml
deleted file mode 100644
index 32d15a790..000000000
--- a/conda-recipes/llvmdev_llvm15/meta.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-{% set shortversion = "15.0" %}
-{% set version = "15.0.7" %}
-{% set sha256_llvm = "8b5fcb24b4128cf04df1b0b9410ce8b1a729cb3c544e6da885d234280dedeac6" %}
-{% set build_number = "1" %}
-
-package:
-  name: llvmdev
-  version: {{ version }}
-
-source:
-  - url: https://github.com/llvm/llvm-project/releases/download/llvmorg-{{ version.replace(".rc", "-rc") }}/llvm-project-{{ version.replace(".rc", "rc") }}.src.tar.xz
-    sha256: {{ sha256_llvm }}
-    patches:
-    - ../llvm15-clear-gotoffsetmap.patch
-    - ../llvm15-remove-use-of-clonefile.patch
-    - ../llvm15-svml.patch
-    - ../compiler-rt-cfi-startproc-war.patch
-    - ../compiler-rt-macos-build.patch
-
-    # Patches from conda-forge needed for windows to build
-    # backport of zlib patches, can be dropped for vs15.0.3, see
-    # https://reviews.llvm.org/D135457 & https://reviews.llvm.org/D136065
-    - patches/0002-CMake-Fix-Findzstd-module-for-shared-DLL-on-Windows.patch
-    - patches/no-windows-symlinks.patch
-
-build:
-  number: {{ build_number }}
-  script_env:
-    - PY_VCRUNTIME_REDIST
-  ignore_run_exports:
-    # Is static-linked
-    - xar
-
-requirements:
-  build:
-    - {{ compiler('cxx') }}
-    - cmake
-    - ninja
-    - python >=3
-    - libcxx      # it is not defined{{ cxx_compiler_version }}  # [osx]
-    - patch       # [not win]
-    - m2-patch    # [win]
-    - git         # [(linux and x86_64)]
-
-  host:
-    - libcxx    # it is not defined{{ cxx_compiler_version }}  # [osx]
-    - libffi    # [unix]
-    # libxml2 supports a windows-only feature, see https://github.com/llvm/llvm-project/blob/llvmorg-17.0.6/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h
-    - libxml2   # [win]
-    - zlib 
-    - zstd 
-
-test:
-  files:
-    - numba-3016.ll
-  commands:
-    - $PREFIX/bin/llvm-config --libs                         # [not win]
-    - $PREFIX/bin/llc -version                               # [not win]
-
-    - if not exist %LIBRARY_INC%\\llvm\\Pass.h exit 1        # [win]
-    - if not exist %LIBRARY_LIB%\\LLVMSupport.lib exit 1     # [win]
-
-    - test -f $PREFIX/include/llvm/Pass.h                    # [unix]
-    - test -f $PREFIX/lib/libLLVMSupport.a                   # [unix]
-
-    - test -f $PREFIX/lib/libLLVMCore.a                      # [not win]
-
-    # LLD tests
-    - ld.lld --version                                       # [unix]
-    - lld-link /?                                            # [win]
-
-about:
-  home: http://llvm.org/
-  dev_url: https://github.com/llvm-mirror/llvm
-  license: NCSA
-  license_file: llvm/LICENSE.TXT
-  summary: Development headers and libraries for LLVM
diff --git a/conda-recipes/llvmdev_llvm15/numba-3016.ll b/conda-recipes/llvmdev_llvm15/numba-3016.ll
deleted file mode 100644
index 1a9b3ecf8..000000000
--- a/conda-recipes/llvmdev_llvm15/numba-3016.ll
+++ /dev/null
@@ -1,80 +0,0 @@
-; Regression test for llvmdev-feedstock#52 and numba#3016
-
-; Generated from C code: int a[1<<10],b[1<<10]; void foo() { int i=0; for(i=0; i<1<<10; i++) { b[i]=sin(a[i]); }}
-; compiled: -fvectorize -fveclib=SVML -O -S -mavx -mllvm -disable-llvm-optzns -emit-llvm
-
-; RUN: opt -vector-library=SVML -mcpu=haswell -O3 -S < %s | FileCheck %s
-; CHECK: call {{.*}}__svml_sin4_ha(
-; CHECK-NOT: call {{.*}}__svml_sin4(
-; CHECK-NOT: call {{.*}}__svml_sin8
-
-source_filename = "svml-3016.c"
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-linux-gnu"
-
-@a = common dso_local global [1024 x i32] zeroinitializer, align 16
-@b = common dso_local global [1024 x i32] zeroinitializer, align 16
-
-; Function Attrs: nounwind uwtable
-define dso_local void @foo() #0 {
-  %1 = alloca i32, align 4
-  %2 = bitcast i32* %1 to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* %2) #3
-  store i32 0, i32* %1, align 4, !tbaa !2
-  store i32 0, i32* %1, align 4, !tbaa !2
-  br label %3
-
-; <label>:3:                                      ; preds = %17, %0
-  %4 = load i32, i32* %1, align 4, !tbaa !2
-  %5 = icmp slt i32 %4, 1024
-  br i1 %5, label %6, label %20
-
-; <label>:6:                                      ; preds = %3
-  %7 = load i32, i32* %1, align 4, !tbaa !2
-  %8 = sext i32 %7 to i64
-  %9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @a, i64 0, i64 %8
-  %10 = load i32, i32* %9, align 4, !tbaa !2
-  %11 = sitofp i32 %10 to double
-  %12 = call double @"llvm.sin.f64"(double %11) #3
-  %13 = fptosi double %12 to i32
-  %14 = load i32, i32* %1, align 4, !tbaa !2
-  %15 = sext i32 %14 to i64
-  %16 = getelementptr inbounds [1024 x i32], [1024 x i32]* @b, i64 0, i64 %15
-  store i32 %13, i32* %16, align 4, !tbaa !2
-  br label %17
-
-; <label>:17:                                     ; preds = %6
-  %18 = load i32, i32* %1, align 4, !tbaa !2
-  %19 = add nsw i32 %18, 1
-  store i32 %19, i32* %1, align 4, !tbaa !2
-  br label %3
-
-; <label>:20:                                     ; preds = %3
-  %21 = bitcast i32* %1 to i8*
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* %21) #3
-  ret void
-}
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
-
-; Function Attrs: nounwind
-declare dso_local double @"llvm.sin.f64"(double) #2
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
-
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+avx,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { argmemonly nounwind }
-attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+avx,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"wchar_size", i32 4}
-!1 = !{!"clang version 7.0.0- (trunk)"}
-!2 = !{!3, !3, i64 0}
-!3 = !{!"int", !4, i64 0}
-!4 = !{!"omnipotent char", !5, i64 0}
-!5 = !{!"Simple C/C++ TBAA"}

From dc56048d5048852bc3c34fdb042bee1cb90d7377 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Mon, 15 Jul 2024 11:58:19 +0100
Subject: [PATCH 7/7] Replace llvmlite LLVM 14 recipe with LLVM 15 recipe

---
 conda-recipes/llvmlite/build.sh               |  6 +-
 .../llvmlite/conda_build_config.yaml          |  5 ++
 conda-recipes/llvmlite/meta.yaml              |  4 +-
 conda-recipes/llvmlite_llvm15/bld.bat         | 22 --------
 conda-recipes/llvmlite_llvm15/build.sh        | 49 ----------------
 .../llvmlite_llvm15/conda_build_config.yaml   | 17 ------
 conda-recipes/llvmlite_llvm15/meta.yaml       | 56 -------------------
 conda-recipes/llvmlite_llvm15/run_test.py     |  6 --
 8 files changed, 11 insertions(+), 154 deletions(-)
 delete mode 100755 conda-recipes/llvmlite_llvm15/bld.bat
 delete mode 100755 conda-recipes/llvmlite_llvm15/build.sh
 delete mode 100644 conda-recipes/llvmlite_llvm15/conda_build_config.yaml
 delete mode 100644 conda-recipes/llvmlite_llvm15/meta.yaml
 delete mode 100644 conda-recipes/llvmlite_llvm15/run_test.py

diff --git a/conda-recipes/llvmlite/build.sh b/conda-recipes/llvmlite/build.sh
index 22cc08d78..9cd0f795d 100755
--- a/conda-recipes/llvmlite/build.sh
+++ b/conda-recipes/llvmlite/build.sh
@@ -4,9 +4,9 @@ set -x
 
 if [[ $(uname) == Darwin ]]; then
   if [[ $build_platform == osx-arm64 ]]; then
-      CLANG_PKG_SELECTOR=clangxx_osx-arm64=12
+      CLANG_PKG_SELECTOR=clangxx_osx-arm64=14
   else
-      CLANG_PKG_SELECTOR=clangxx_osx-64=10
+      CLANG_PKG_SELECTOR=clangxx_osx-64=14
   fi
   ${SYS_PREFIX}/bin/conda create -y -p ${SRC_DIR}/bootstrap ${CLANG_PKG_SELECTOR}
   export PATH=${SRC_DIR}/bootstrap/bin:${PATH}
@@ -28,7 +28,7 @@ if [ -n "$MACOSX_DEPLOYMENT_TARGET" ]; then
         export MACOSX_DEPLOYMENT_TARGET=11.0
     else
         # OSX needs 10.7 or above with libc++ enabled
-        export MACOSX_DEPLOYMENT_TARGET=10.10
+        export MACOSX_DEPLOYMENT_TARGET=10.12
     fi
 fi
 
diff --git a/conda-recipes/llvmlite/conda_build_config.yaml b/conda-recipes/llvmlite/conda_build_config.yaml
index 81b7d08c3..bfd03bec1 100644
--- a/conda-recipes/llvmlite/conda_build_config.yaml
+++ b/conda-recipes/llvmlite/conda_build_config.yaml
@@ -10,3 +10,8 @@ cxx_compiler_version:       # [linux]
 fortran_compiler_version:   # [linux]
   - 7                       # [linux and (x86_64 or ppc64le)]
   - 9                       # [linux and aarch64]
+
+c_compiler:              # [win]
+  - vs2019               # [win]
+cxx_compiler:            # [win]
+  - vs2019               # [win]
diff --git a/conda-recipes/llvmlite/meta.yaml b/conda-recipes/llvmlite/meta.yaml
index effbc9b6e..a797c37ef 100644
--- a/conda-recipes/llvmlite/meta.yaml
+++ b/conda-recipes/llvmlite/meta.yaml
@@ -31,7 +31,7 @@ requirements:
     - python
     - setuptools
     # On channel https://anaconda.org/numba/
-    - llvmdev 14.* *3
+    - llvmdev 15.*
     - vs2015_runtime # [win]
     # llvmdev is built with libz compression support
     - zlib           # [unix]
@@ -42,6 +42,8 @@ requirements:
     - vs2015_runtime # [win]
     # osx has dynamically linked libstdc++
     - libcxx >=4.0.1 # [osx]
+    - zlib 
+    - zstd 
 
 test:
   imports:
diff --git a/conda-recipes/llvmlite_llvm15/bld.bat b/conda-recipes/llvmlite_llvm15/bld.bat
deleted file mode 100755
index 870f33249..000000000
--- a/conda-recipes/llvmlite_llvm15/bld.bat
+++ /dev/null
@@ -1,22 +0,0 @@
-
-@rem Let CMake know about the LLVM install path, for find_package()
-set CMAKE_PREFIX_PATH=%LIBRARY_PREFIX%
-
-@rem VS2019 uses a different naming convention for platforms than older version
-if "%ARCH%"=="32" (
-    @rem VS2017:
-    @rem set CMAKE_GENERATOR_ARCH=
-    set CMAKE_GENERATOR_ARCH=Win32
-) else (
-    @rem VS2017
-    @rem set CMAKE_GENERATOR_ARCH=Win64
-    set CMAKE_GENERATOR_ARCH=x64
-)
-set CMAKE_GENERATOR=Visual Studio 16 2019
-set CMAKE_GENERATOR_TOOLKIT=v142
-
-@rem Ensure there are no build leftovers (CMake can complain)
-if exist ffi\build rmdir /S /Q ffi\build
-
-%PYTHON% setup.py install
-if errorlevel 1 exit 1
diff --git a/conda-recipes/llvmlite_llvm15/build.sh b/conda-recipes/llvmlite_llvm15/build.sh
deleted file mode 100755
index 9cd0f795d..000000000
--- a/conda-recipes/llvmlite_llvm15/build.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-set -x
-
-if [[ $(uname) == Darwin ]]; then
-  if [[ $build_platform == osx-arm64 ]]; then
-      CLANG_PKG_SELECTOR=clangxx_osx-arm64=14
-  else
-      CLANG_PKG_SELECTOR=clangxx_osx-64=14
-  fi
-  ${SYS_PREFIX}/bin/conda create -y -p ${SRC_DIR}/bootstrap ${CLANG_PKG_SELECTOR}
-  export PATH=${SRC_DIR}/bootstrap/bin:${PATH}
-  CONDA_PREFIX=${SRC_DIR}/bootstrap \
-    . ${SRC_DIR}/bootstrap/etc/conda/activate.d/*
-  export CONDA_BUILD_SYSROOT=${CONDA_BUILD_SYSROOT:-/opt/MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk}
-  export CXXFLAGS=${CFLAGS}" -mmacosx-version-min=${MACOSX_DEPLOYMENT_TARGET}"
-  export CFLAGS=${CFLAGS}" -mmacosx-version-min=${MACOSX_DEPLOYMENT_TARGET}"
-  SYSROOT_DIR=${CONDA_BUILD_SYSROOT}
-  CFLAG_SYSROOT="--sysroot ${SYSROOT_DIR}"
-  # export LLVM_CONFIG explicitly as the one installed from llvmdev
-  # in the build root env, the one in the bootstrap location needs to be ignored.
-  export LLVM_CONFIG="${PREFIX}/bin/llvm-config"
-  ${LLVM_CONFIG} --version
-fi
-
-if [ -n "$MACOSX_DEPLOYMENT_TARGET" ]; then
-    if [[ $build_platform == osx-arm64 ]]; then
-        export MACOSX_DEPLOYMENT_TARGET=11.0
-    else
-        # OSX needs 10.7 or above with libc++ enabled
-        export MACOSX_DEPLOYMENT_TARGET=10.12
-    fi
-fi
-
-
-# This is the clang compiler prefix
-if [[ $build_platform == osx-arm64 ]]; then
-    DARWIN_TARGET=arm64-apple-darwin20.0.0
-else
-    DARWIN_TARGET=x86_64-apple-darwin13.4.0
-fi
-
-
-export PYTHONNOUSERSITE=1
-# Enables static linking of stdlibc++
-export LLVMLITE_CXX_STATIC_LINK=1
-
-$PYTHON setup.py build --force
-$PYTHON setup.py install
diff --git a/conda-recipes/llvmlite_llvm15/conda_build_config.yaml b/conda-recipes/llvmlite_llvm15/conda_build_config.yaml
deleted file mode 100644
index bfd03bec1..000000000
--- a/conda-recipes/llvmlite_llvm15/conda_build_config.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-# Numba/llvmlite stack needs an older compiler for backwards compatability.
-c_compiler_version:         # [linux]
-  - 7                       # [linux and (x86_64 or ppc64le)]
-  - 9                       # [linux and aarch64]
-
-cxx_compiler_version:       # [linux]
-  - 7                       # [linux and (x86_64 or ppc64le)]
-  - 9                       # [linux and aarch64]
-
-fortran_compiler_version:   # [linux]
-  - 7                       # [linux and (x86_64 or ppc64le)]
-  - 9                       # [linux and aarch64]
-
-c_compiler:              # [win]
-  - vs2019               # [win]
-cxx_compiler:            # [win]
-  - vs2019               # [win]
diff --git a/conda-recipes/llvmlite_llvm15/meta.yaml b/conda-recipes/llvmlite_llvm15/meta.yaml
deleted file mode 100644
index a797c37ef..000000000
--- a/conda-recipes/llvmlite_llvm15/meta.yaml
+++ /dev/null
@@ -1,56 +0,0 @@
-{% set VERSION_SUFFIX = "" %} # debug version suffix, appended to the version
-
-package:
-  name: llvmlite
-  # GIT_DESCRIBE_TAG may not be set
-  version: {{ "%s%s" % (environ.get('GIT_DESCRIBE_TAG', '').lstrip('v'), VERSION_SUFFIX) }}
-
-source:
-  # Using the local source tree helps test building without pushing changes
-  path: ../..
-
-build:
-  number: {{ environ.get('GIT_DESCRIBE_NUMBER', 0) }}
-  script_env:
-    - PY_VCRUNTIME_REDIST
-    - LLVMLITE_SHARED
-    - LLVMLITE_USE_CMAKE
-
-requirements:
-  build:
-    # We cannot do this on macOS as the llvm-config from the
-    # toolchain conflicts with the same from llvmdev, the
-    # build.sh deals with it!
-    - {{ compiler('c') }}  # [not osx]
-    - {{ compiler('cxx') }}  # [not osx]
-    - vs2015_{{ target_platform  }}    # [win]
-    # The DLL build uses cmake on Windows
-    - cmake          # [win]
-    - make           # [unix]
-  host:
-    - python
-    - setuptools
-    # On channel https://anaconda.org/numba/
-    - llvmdev 15.*
-    - vs2015_runtime # [win]
-    # llvmdev is built with libz compression support
-    - zlib           # [unix]
-    # requires libxml2
-    - libxml2        # [win]
-  run:
-    - python >=3.9
-    - vs2015_runtime # [win]
-    # osx has dynamically linked libstdc++
-    - libcxx >=4.0.1 # [osx]
-    - zlib 
-    - zstd 
-
-test:
-  imports:
-    - llvmlite
-    - llvmlite.binding
-
-about:
-  home: https://github.com/numba/llvmlite
-  license: New BSD License
-  summary: A lightweight LLVM python binding for writing JIT compilers
diff --git a/conda-recipes/llvmlite_llvm15/run_test.py b/conda-recipes/llvmlite_llvm15/run_test.py
deleted file mode 100644
index b90cbb3be..000000000
--- a/conda-recipes/llvmlite_llvm15/run_test.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import os
-from llvmlite.tests import main
-
-# Enable tests for distribution only
-os.environ['LLVMLITE_DIST_TEST'] = '1'
-main()