diff --git a/sycl/include/sycl/marray.hpp b/sycl/include/sycl/marray.hpp index 304718100143e..9b89189d0ea8a 100644 --- a/sycl/include/sycl/marray.hpp +++ b/sycl/include/sycl/marray.hpp @@ -19,6 +19,12 @@ template class marray; template struct is_device_copyable; +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +#ifndef MARRAY_USE_WIDE_ALIGNMENT +#define MARRAY_USE_WIDE_ALIGNMENT 1 +#endif // MARRAY_USE_WIDE_ALIGNMENT +#endif + namespace detail { // Helper trait for counting the aggregate number of arguments in a type list, @@ -35,6 +41,27 @@ template struct GetMArrayArgsSize { static constexpr std::size_t value = 1 + GetMArrayArgsSize::value; }; +// Computes the storage alignment for marray. NumElements == 3 is padded to 4 +// elements and aligned as 4 elements (matching sycl::vec); all other sizes +// keep tight packing by using the largest power-of-two alignment that divides +// the total byte size. +// The alignment guarantee is limited to 64 bytes because some host compilers +// (e.g. on Microsoft Windows) limit the maximum alignment of function +// parameters to this value. +constexpr std::size_t marrayAlignment(std::size_t NumElements, + std::size_t ElemSize) { + constexpr std::size_t MaxAlign = 64; + if (NumElements == 3) { + std::size_t Bytes = 4 * ElemSize; + return Bytes <= MaxAlign ? Bytes : MaxAlign; + } + std::size_t Bytes = NumElements * ElemSize; + std::size_t Align = 1; + while ((Align << 1) <= MaxAlign && (Bytes % (Align << 1)) == 0) + Align <<= 1; + return Align; +} + } // namespace detail /// Provides a cross-platform math array class template that works on @@ -58,7 +85,21 @@ template class marray { using const_iterator = const Type *; private: + // For NumElements == 3 this aligns to 4 elements (capped at 64), which adds + // trailing padding so sizeof == 4 * sizeof(DataT). For all other sizes the + // alignment divides the total size, so no padding is added. + static constexpr std::size_t MArrayAlignment = + detail::marrayAlignment(NumElements, sizeof(DataT)); + +// Changing marray's alignment may change the ABI, so we need to guard it +// with a macro. User can either use -fpreview-breaking-changes or define +// MARRAY_USE_WIDE_ALIGNMENT macro and assume responsibility of ABI breakages +// if marray is passed across ABI boundary. +#ifdef MARRAY_USE_WIDE_ALIGNMENT + alignas(MArrayAlignment) value_type MData[NumElements]; +#else value_type MData[NumElements]; +#endif // Trait for checking if an argument type is either convertible to the data // type or an array of types convertible to the data type. diff --git a/sycl/test-e2e/Performance/vec_vs_marray.cpp b/sycl/test-e2e/Performance/vec_vs_marray.cpp new file mode 100644 index 0000000000000..d8989595e3122 --- /dev/null +++ b/sycl/test-e2e/Performance/vec_vs_marray.cpp @@ -0,0 +1,266 @@ +//==------- vec_vs_marray.cpp --- sycl::vec vs sycl::marray performance ----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Performance comparison between sycl::vec and sycl::marray. +// +// Motivation +// sycl::marray (SYCL 2020 sec. 4.14.3) is specified as a plain array-like +// type with no over-alignment requirement, whereas sycl::vec (SYCL 2020 sec. +// 4.14.2) is over-aligned to the size of the whole vector. When user code is +// migrated from sycl::vec to sycl::marray the relaxed alignment can pessimize +// vectorized memory accesses, which is especially visible for 16-bit element +// types such as sycl::half. This test measures both containers side by side +// so that such differences become visible. +// +// What is compared +// The timed kernels use only the subset of the API that is common to both +// containers, so the exact same kernel body is instantiated for each and any +// timing difference is attributable to the container type alone: +// * construction from a scalar vec(const T&) / marray(const T&) +// * copy/load C v = in[i]; +// * store/assignment out[i] = v; +// * operator[] / size() element access (smoke + init) +// * arithmetic operators + - * v * a + a +// * compound assignment v -= a +// +// sycl::vec only supports 1, 2, 3, 4, 8 and 16 components, while sycl::marray +// can have an arbitrary number of components. Sizes supported by both are +// compared head to head; sizes that only marray supports are reported on +// their own ("marray-only"). +// +// The test is informational for performance: it prints the per-(type, size) +// kernel times for both containers and highlights rows where the marray/vec +// ratio exceeds threshold. It never fails on a performance difference. +// +// +// RUN: %{build} -o %t_non_prev.out +// RUN: %{build} -fpreview-breaking-changes -o %t_prev.out +// RUN: %{run} %t_non_prev.out +// RUN: %{run} %t_prev.out +// +// UNSUPPORTED: linux || windows +// UNSUPPORTED-INTENDED: This test is intended to be run manually to compare +// the performance of vec and marray. It doesn't check or assert anything. + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +using namespace sycl; + +namespace { + +// -- problem size / measurement parameters --------------------------------- +constexpr size_t NumElems = 1 << 18; // work-items per kernel launch +constexpr int ComputeIters = 64; // arithmetic iterations (compute kernel) +constexpr int Warmup = 2; // un-timed launches +constexpr int Repeats = 10; // timed launches; median is kept + +// marray/vec ratio above which a row is highlighted (purely informational). +constexpr double threshold = 1.5; + +// -- container introspection ------------------------------------------------ +// Extracts the element type from a vec or marray container type. +template struct ContainerElement; +template +struct ContainerElement> { + using type = ElementT; +}; +template +struct ContainerElement> { + using type = ElementT; +}; +template +using ContainerElementT = typename ContainerElement::type; + +enum class BenchKind { Stream, Compute }; + +// One timed launch. Returns the device-side kernel duration in nanoseconds as +// reported by event profiling. +template +double launchOnce(queue &q, const ContainerT *in, ContainerT *out, + [[maybe_unused]] ContainerElementT one) { + event e = q.parallel_for(range<1>(NumElems), [=](id<1> idx) { + const size_t i = idx[0]; + const ContainerT a = in[i]; + if constexpr (Mode == BenchKind::Stream) { + // Load + vector add + store: sensitive to element alignment. + out[i] = a + ContainerT(one); + } else { + // Arithmetic heavy: sensitive to per-element compute throughput. + ContainerT acc = a; + for (int k = 0; k < ComputeIters; ++k) { + acc = acc * a + a; // multiply + add + acc -= a; // compound subtract + } + out[i] = acc; + } + }); + e.wait_and_throw(); + const auto s = e.get_profiling_info(); + const auto f = e.get_profiling_info(); + return static_cast(f - s); +} + +// Warm-up + timed launches for one container type and bench kind. Returns the +// median kernel time in nanoseconds, or -1 on allocation failure (so over-sized +// configurations are skipped instead of aborting). +template double bench(queue &q) { + using ElementT = ContainerElementT; + const ElementT one = static_cast(static_cast(1)); + + ContainerT *in = nullptr; + ContainerT *out = nullptr; + try { + in = malloc_device(NumElems, q); + out = malloc_device(NumElems, q); + if (!in || !out) + throw std::runtime_error("allocation returned null"); + } catch (const std::exception &) { + if (in) + sycl::free(in, q); + if (out) + sycl::free(out, q); + return -1.0; + } + + // Initialise input with index-dependent values to defeat constant folding. + q.parallel_for(range<1>(NumElems), [=](id<1> idx) { + const size_t i = idx[0]; + in[i] = ContainerT(static_cast(static_cast((i % 7) + 1))); + }).wait_and_throw(); + + for (int w = 0; w < Warmup; ++w) + (void)launchOnce(q, in, out, one); + + std::array samples; + for (int r = 0; r < Repeats; ++r) + samples[r] = launchOnce(q, in, out, one); + + sycl::free(in, q); + sycl::free(out, q); + + // Median of the timed launches: robust to outliers while still representative + // of typical (not just best-case) performance. + std::sort(samples.begin(), samples.end()); + if constexpr (Repeats % 2 == 1) + return samples[Repeats / 2]; + else + return 0.5 * (samples[Repeats / 2 - 1] + samples[Repeats / 2]); +} + +// Compare vec against marray +// for one (type, size). Prints the timings and highlights the row when the +// marray/vec ratio exceeds threshold. Never affects the test's pass/fail +// status. +template +void compareVecAndMarray(queue &q, const char *typeName) { + using VecT = vec; + using MarrayT = marray(NumComponents)>; + + const double vStream = bench(q); + const double mStream = bench(q); + const double vCompute = bench(q); + const double mCompute = bench(q); + + if (vStream < 0 || mStream < 0 || vCompute < 0 || mCompute < 0) { + std::cout << " " << std::left << std::setw(8) << typeName << " x" + << NumComponents << " \n"; + return; + } + + const double eps = 1.0; + const double rStream = mStream / std::max(vStream, eps); + const double rCompute = mCompute / std::max(vCompute, eps); + const bool highlight = rStream > threshold || rCompute > threshold; + + std::cout << " " << std::left << std::setw(8) << typeName << "x" + << std::setw(3) << NumComponents << std::right << std::fixed + << std::setprecision(2) << " stream(vec/marr ns):" << std::setw(12) + << vStream << " /" << std::setw(12) << mStream << " x" + << std::setw(6) << rStream << " compute:" << std::setw(12) + << vCompute << " /" << std::setw(12) << mCompute << " x" + << std::setw(6) << rCompute + << (highlight ? " <== LARGE DIFFERENCE" : "") << "\n"; +} + +// Benchmark marray for a size that vec cannot +// represent. Only marray timings are printed; there is nothing to compare +// against. +template +void benchMarrayOnly(queue &q, const char *typeName) { + using MarrayT = marray; + + const double mStream = bench(q); + const double mCompute = bench(q); + + if (mStream < 0 || mCompute < 0) { + std::cout << " " << std::left << std::setw(8) << typeName << " x" + << NumComponents + << " marray-only \n"; + return; + } + + std::cout << " " << std::left << std::setw(8) << typeName << "x" + << std::setw(3) << NumComponents << std::right << std::fixed + << std::setprecision(2) + << " marray-only stream(ns):" << std::setw(12) << mStream + << " compute(ns):" << std::setw(12) << mCompute << "\n"; +} + +template +void benchAllSizes(queue &q, const char *typeName) { + // Sizes supported by both vec and marray: compared head to head. + compareVecAndMarray(q, typeName); + compareVecAndMarray(q, typeName); + compareVecAndMarray(q, typeName); + compareVecAndMarray(q, typeName); + compareVecAndMarray(q, typeName); + + // Sizes only marray supports (vec is limited to 1, 2, 3, 4, 8, 16): + // marray timings reported on their own. + benchMarrayOnly(q, typeName); + benchMarrayOnly(q, typeName); + benchMarrayOnly(q, typeName); + benchMarrayOnly(q, typeName); +} + +} // namespace + +int main() { + queue q{property::queue::enable_profiling{}}; + device dev = q.get_device(); + + std::cout << "Device: " << dev.get_info() << "\n"; + std::cout << "Highlight threshold: x" << threshold << "\n"; + std::cout << "Lower ns is better; marray/vec ratio > threshold is " + "highlighted.\n\n"; + + benchAllSizes(q, "int8"); + benchAllSizes(q, "int16"); + benchAllSizes(q, "int32"); + benchAllSizes(q, "int64"); + if (dev.has(aspect::fp16)) + benchAllSizes(q, "half"); + benchAllSizes(q, "float"); + if (dev.has(aspect::fp64)) + benchAllSizes(q, "double"); + benchAllSizes(q, "bf16"); + + return 0; +} \ No newline at end of file diff --git a/sycl/test/check_device_code/marray/marray_ops.cpp b/sycl/test/check_device_code/marray/marray_ops.cpp new file mode 100644 index 0000000000000..91ff2f61d9315 --- /dev/null +++ b/sycl/test/check_device_code/marray/marray_ops.cpp @@ -0,0 +1,1306 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals none --version 4 +// NOTE: ..., followed by some manual cleanup. + +// RUN: %clangxx -I %sycl_include -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -disable-lifetime-markers -fsycl-device-only %s -o - | FileCheck %s +// RUN: %clangxx -I %sycl_include -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -disable-lifetime-markers -fsycl-device-only -fpreview-breaking-changes %s -o - | FileCheck -check-prefix=CHECK-PREVIEW %s + +// Windows/linux have some slight differences in IR generation (function +// arguments passing and long/long long differences/mangling) that could +// complicate test updates while not improving test coverage. Limiting to linux +// should be fine. +// REQUIRES: linux +#include + +using namespace sycl; +using bf16 = sycl::ext::oneapi::bfloat16; + +/*************** Constructors / Conversion ******************/ + +// CHECK-LABEL: define dso_local spir_func void @_Z15TestDefaultCtorv( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray") align 4 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.memset.p4.i64(ptr addrspace(4) noundef align 4 dereferenceable(16) [[AGG_RESULT]], i8 0, i64 16, i1 false) +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z15TestDefaultCtorv( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: tail call void @llvm.memset.p4.i64(ptr addrspace(4) noundef align 16 dereferenceable(16) [[AGG_RESULT]], i8 0, i64 16, i1 false) +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL marray TestDefaultCtor() { return marray(); } +// CHECK-LABEL: define dso_local spir_func void @_Z14TestScalarCtori( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.0") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], i32 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: store i32 [[A]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA5:![0-9]+]] +// CHECK-NEXT: br label [[ARRAYINIT_BODY_I:%.*]] +// CHECK: arrayinit.body.i: +// CHECK-NEXT: [[ARRAYINIT_CUR_IDX_I:%.*]] = phi i64 [ 4, [[ENTRY:%.*]] ], [ [[ARRAYINIT_CUR_ADD_I:%.*]], [[ARRAYINIT_BODY_I]] ] +// CHECK-NEXT: [[ARRAYINIT_CUR_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[ARRAYINIT_CUR_IDX_I]] +// CHECK-NEXT: store i32 0, ptr addrspace(4) [[ARRAYINIT_CUR_PTR_I]], align 4, !tbaa [[TBAA5]] +// CHECK-NEXT: [[ARRAYINIT_CUR_ADD_I]] = add nuw nsw i64 [[ARRAYINIT_CUR_IDX_I]], 4 +// CHECK-NEXT: [[ARRAYINIT_DONE_I:%.*]] = icmp eq i64 [[ARRAYINIT_CUR_ADD_I]], 12 +// CHECK-NEXT: br i1 [[ARRAYINIT_DONE_I]], label [[FOR_COND_I_I:%.*]], label [[ARRAYINIT_BODY_I]] +// CHECK: for.cond.i.i: +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ], [ 0, [[ARRAYINIT_BODY_I]] ] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 3 +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16MARRAYIILM3EEC2ERKI_EXIT:%.*]] +// CHECK: for.body.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I_I]] +// CHECK-NEXT: store i32 [[A]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA5]] +// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK: _ZN4sycl3_V16marrayIiLm3EEC2ERKi.exit: +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z14TestScalarCtori( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.0") align 16 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], i32 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: store i32 [[A]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA5:![0-9]+]] +// CHECK-PREVIEW-NEXT: br label [[ARRAYINIT_BODY_I:%.*]] +// CHECK-PREVIEW: arrayinit.body.i: +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_CUR_IDX_I:%.*]] = phi i64 [ 4, [[ENTRY:%.*]] ], [ [[ARRAYINIT_CUR_ADD_I:%.*]], [[ARRAYINIT_BODY_I]] ] +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_CUR_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[ARRAYINIT_CUR_IDX_I]] +// CHECK-PREVIEW-NEXT: store i32 0, ptr addrspace(4) [[ARRAYINIT_CUR_PTR_I]], align 4, !tbaa [[TBAA5]] +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_CUR_ADD_I]] = add nuw nsw i64 [[ARRAYINIT_CUR_IDX_I]], 4 +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_DONE_I:%.*]] = icmp eq i64 [[ARRAYINIT_CUR_ADD_I]], 12 +// CHECK-PREVIEW-NEXT: br i1 [[ARRAYINIT_DONE_I]], label [[FOR_COND_I_I:%.*]], label [[ARRAYINIT_BODY_I]] +// CHECK-PREVIEW: for.cond.i.i: +// CHECK-PREVIEW-NEXT: [[I_0_I_I:%.*]] = phi i64 [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ], [ 0, [[ARRAYINIT_BODY_I]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 3 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16MARRAYIILM3EEC2ERKI_EXIT:%.*]] +// CHECK-PREVIEW: for.body.i.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I_I]] +// CHECK-PREVIEW-NEXT: store i32 [[A]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA5]] +// CHECK-PREVIEW-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V16marrayIiLm3EEC2ERKi.exit: +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL marray TestScalarCtor(int a) { return marray(a); } +// CHECK-LABEL: define dso_local spir_func void @_Z16TestVariadicCtorii( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.1") align 4 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: store i32 [[A]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA5]] +// CHECK-NEXT: [[ARRAYINIT_ELEMENT_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 4 +// CHECK-NEXT: store i32 [[B]], ptr addrspace(4) [[ARRAYINIT_ELEMENT_I_I]], align 4, !tbaa [[TBAA5]] +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z16TestVariadicCtorii( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.1") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: store i32 [[A]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA5]] +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_ELEMENT_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 4 +// CHECK-PREVIEW-NEXT: store i32 [[B]], ptr addrspace(4) [[ARRAYINIT_ELEMENT_I_I]], align 4, !tbaa [[TBAA5]] +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL marray TestVariadicCtor(int a, int b) { + return marray(a, b); +} +// Variadic ctor that splices an marray argument (GetMArrayArgsSize path). +// CHECK-LABEL: define dso_local spir_func void @_Z22TestVariadicSpliceCtorN4sycl3_V16marrayIiLm2EEEiii( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.4") align 4 captures(none) initializes((0, 20)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.1") align 4 captures(none) [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[C:%.*]], i32 noundef [[D:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4, !tbaa [[TBAA5]], !noalias [[META12:![0-9]+]] +// CHECK-NEXT: [[ARRAYIDX2_I_I_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2_I_I_I_I_I]], align 4, !tbaa [[TBAA5]], !noalias [[META12]] +// CHECK-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA5]] +// CHECK-NEXT: [[ARRAYINIT_ELEMENT_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 4 +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[ARRAYINIT_ELEMENT_I_I]], align 4, !tbaa [[TBAA5]] +// CHECK-NEXT: [[ARRAYINIT_ELEMENT3_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 8 +// CHECK-NEXT: store i32 [[B]], ptr addrspace(4) [[ARRAYINIT_ELEMENT3_I_I]], align 4, !tbaa [[TBAA5]] +// CHECK-NEXT: [[ARRAYINIT_ELEMENT5_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 12 +// CHECK-NEXT: store i32 [[C]], ptr addrspace(4) [[ARRAYINIT_ELEMENT5_I_I]], align 4, !tbaa [[TBAA5]] +// CHECK-NEXT: [[ARRAYINIT_ELEMENT7_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 16 +// CHECK-NEXT: store i32 [[D]], ptr addrspace(4) [[ARRAYINIT_ELEMENT7_I_I]], align 4, !tbaa [[TBAA5]] +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z22TestVariadicSpliceCtorN4sycl3_V16marrayIiLm2EEEiii( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.4") align 4 captures(none) initializes((0, 20)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.1") align 8 captures(none) [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[C:%.*]], i32 noundef [[D:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 8, !tbaa [[TBAA5]], !noalias [[META12:![0-9]+]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX2_I_I_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 4 +// CHECK-PREVIEW-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2_I_I_I_I_I]], align 4, !tbaa [[TBAA5]], !noalias [[META12]] +// CHECK-PREVIEW-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA5]] +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_ELEMENT_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 4 +// CHECK-PREVIEW-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[ARRAYINIT_ELEMENT_I_I]], align 4, !tbaa [[TBAA5]] +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_ELEMENT3_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 8 +// CHECK-PREVIEW-NEXT: store i32 [[B]], ptr addrspace(4) [[ARRAYINIT_ELEMENT3_I_I]], align 4, !tbaa [[TBAA5]] +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_ELEMENT5_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 12 +// CHECK-PREVIEW-NEXT: store i32 [[C]], ptr addrspace(4) [[ARRAYINIT_ELEMENT5_I_I]], align 4, !tbaa [[TBAA5]] +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_ELEMENT7_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 16 +// CHECK-PREVIEW-NEXT: store i32 [[D]], ptr addrspace(4) [[ARRAYINIT_ELEMENT7_I_I]], align 4, !tbaa [[TBAA5]] +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL marray TestVariadicSpliceCtor(marray a, int b, + int c, int d) { + return marray(a, b, c, d); +} +// CHECK-LABEL: define dso_local spir_func void @_Z12TestCopyCtorN4sycl3_V16marrayIfLm63EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.10") align 4 captures(none) initializes((0, 252)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.10") align 4 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR4]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 4 [[AGG_RESULT]], ptr align 4 [[A]], i64 252, i1 false), !tbaa.struct [[TBAA_STRUCT21:![0-9]+]] +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z12TestCopyCtorN4sycl3_V16marrayIfLm63EEE( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.10") align 4 captures(none) initializes((0, 252)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.10") align 4 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR4]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 4 [[AGG_RESULT]], ptr align 4 [[A]], i64 252, i1 false), !tbaa.struct [[TBAA_STRUCT21:![0-9]+]] +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL marray TestCopyCtor(marray a) { + return marray(a); +} +// CHECK-LABEL: define dso_local spir_func void @_Z12TestMoveCtorN4sycl3_V16marrayINS0_3ext6oneapi8bfloat16ELm3EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.11") align 2 captures(none) initializes((0, 6)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.11") align 2 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR4]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 2 [[AGG_RESULT]], ptr align 2 [[A]], i64 6, i1 false), !tbaa.struct [[TBAA_STRUCT23:![0-9]+]] +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z12TestMoveCtorN4sycl3_V16marrayINS0_3ext6oneapi8bfloat16ELm3EEE( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.11") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.11") align 8 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR4]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: [[TMP0:%.*]] = load i64, ptr [[A]], align 8 +// CHECK-PREVIEW-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8 +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL marray TestMoveCtor(marray a) { + return marray(std::move(a)); +} +// operator DataT(), available only when NumElements == 1. +// CHECK-LABEL: define dso_local spir_func void @_Z19TestConvertToScalarN4sycl3_V16marrayINS0_6detail9half_impl4halfELm1EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::detail::half_impl::half") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.12") align 2 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A]], align 2, !tbaa [[TBAA29:![0-9]+]], !noalias [[META26]] +// CHECK-NEXT: store i16 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 2, !tbaa [[TBAA29]], !alias.scope [[META26]] +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z19TestConvertToScalarN4sycl3_V16marrayINS0_6detail9half_impl4halfELm1EEE( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::detail::half_impl::half") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.13") align 2 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) +// CHECK-PREVIEW-NEXT: [[TMP0:%.*]] = load i16, ptr [[A]], align 2, !tbaa [[TBAA28:![0-9]+]], !noalias [[META25]] +// CHECK-PREVIEW-NEXT: store i16 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 2, !tbaa [[TBAA28]], !alias.scope [[META25]] +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL half TestConvertToScalar(marray a) { return a; } + +// Size-3 (padded) and float variants to surface the layout change. +// CHECK-LABEL: define dso_local spir_func void @_Z15TestScalarCtor3i( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.0") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], i32 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: store i32 [[A]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA5]] +// CHECK-NEXT: br label [[ARRAYINIT_BODY_I:%.*]] +// CHECK: arrayinit.body.i: +// CHECK-NEXT: [[ARRAYINIT_CUR_IDX_I:%.*]] = phi i64 [ 4, [[ENTRY:%.*]] ], [ [[ARRAYINIT_CUR_ADD_I:%.*]], [[ARRAYINIT_BODY_I]] ] +// CHECK-NEXT: [[ARRAYINIT_CUR_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[ARRAYINIT_CUR_IDX_I]] +// CHECK-NEXT: store i32 0, ptr addrspace(4) [[ARRAYINIT_CUR_PTR_I]], align 4, !tbaa [[TBAA5]] +// CHECK-NEXT: [[ARRAYINIT_CUR_ADD_I]] = add nuw nsw i64 [[ARRAYINIT_CUR_IDX_I]], 4 +// CHECK-NEXT: [[ARRAYINIT_DONE_I:%.*]] = icmp eq i64 [[ARRAYINIT_CUR_ADD_I]], 12 +// CHECK-NEXT: br i1 [[ARRAYINIT_DONE_I]], label [[FOR_COND_I_I:%.*]], label [[ARRAYINIT_BODY_I]] +// CHECK: for.cond.i.i: +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ], [ 0, [[ARRAYINIT_BODY_I]] ] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 3 +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16MARRAYIILM3EEC2ERKI_EXIT:%.*]] +// CHECK: for.body.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I_I]] +// CHECK-NEXT: store i32 [[A]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA5]] +// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP10]] +// CHECK: _ZN4sycl3_V16marrayIiLm3EEC2ERKi.exit: +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z15TestScalarCtor3i( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.0") align 16 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], i32 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: store i32 [[A]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA5]] +// CHECK-PREVIEW-NEXT: br label [[ARRAYINIT_BODY_I:%.*]] +// CHECK-PREVIEW: arrayinit.body.i: +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_CUR_IDX_I:%.*]] = phi i64 [ 4, [[ENTRY:%.*]] ], [ [[ARRAYINIT_CUR_ADD_I:%.*]], [[ARRAYINIT_BODY_I]] ] +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_CUR_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[ARRAYINIT_CUR_IDX_I]] +// CHECK-PREVIEW-NEXT: store i32 0, ptr addrspace(4) [[ARRAYINIT_CUR_PTR_I]], align 4, !tbaa [[TBAA5]] +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_CUR_ADD_I]] = add nuw nsw i64 [[ARRAYINIT_CUR_IDX_I]], 4 +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_DONE_I:%.*]] = icmp eq i64 [[ARRAYINIT_CUR_ADD_I]], 12 +// CHECK-PREVIEW-NEXT: br i1 [[ARRAYINIT_DONE_I]], label [[FOR_COND_I_I:%.*]], label [[ARRAYINIT_BODY_I]] +// CHECK-PREVIEW: for.cond.i.i: +// CHECK-PREVIEW-NEXT: [[I_0_I_I:%.*]] = phi i64 [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ], [ 0, [[ARRAYINIT_BODY_I]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 3 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16MARRAYIILM3EEC2ERKI_EXIT:%.*]] +// CHECK-PREVIEW: for.body.i.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I_I]] +// CHECK-PREVIEW-NEXT: store i32 [[A]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA5]] +// CHECK-PREVIEW-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP10]] +// CHECK-PREVIEW: _ZN4sycl3_V16marrayIiLm3EEC2ERKi.exit: +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL marray TestScalarCtor3(int a) { + return marray(a); +} +// CHECK-LABEL: define dso_local spir_func void @_Z18TestVariadicCtorF3fff( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.13") align 4 captures(none) initializes((0, 12)) [[AGG_RESULT:%.*]], float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: store float [[A]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA31:![0-9]+]] +// CHECK-NEXT: [[ARRAYINIT_ELEMENT_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 4 +// CHECK-NEXT: store float [[B]], ptr addrspace(4) [[ARRAYINIT_ELEMENT_I_I]], align 4, !tbaa [[TBAA31]] +// CHECK-NEXT: [[ARRAYINIT_ELEMENT3_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 8 +// CHECK-NEXT: store float [[C]], ptr addrspace(4) [[ARRAYINIT_ELEMENT3_I_I]], align 4, !tbaa [[TBAA31]] +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z18TestVariadicCtorF3fff( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.14") align 16 captures(none) initializes((0, 12)) [[AGG_RESULT:%.*]], float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: store float [[A]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA30:![0-9]+]] +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_ELEMENT_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 4 +// CHECK-PREVIEW-NEXT: store float [[B]], ptr addrspace(4) [[ARRAYINIT_ELEMENT_I_I]], align 4, !tbaa [[TBAA30]] +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_ELEMENT3_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 8 +// CHECK-PREVIEW-NEXT: store float [[C]], ptr addrspace(4) [[ARRAYINIT_ELEMENT3_I_I]], align 4, !tbaa [[TBAA30]] +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL marray TestVariadicCtorF3(float a, float b, float c) { + return marray(a, b, c); +} + +/*************** subscript / assignment ******************/ + +// CHECK-LABEL: define dso_local spir_func noundef double @_Z13TestSubscriptN4sycl3_V16marrayIdLm8EEEm( +// CHECK-SAME: ptr noundef readonly byval(%"class.sycl::_V1::marray.17") align 8 captures(none) [[A:%.*]], i64 noundef [[I:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[A]], i64 [[I]] +// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX_I]], align 8, !tbaa [[TBAA35:![0-9]+]] +// CHECK-NEXT: ret double [[TMP0]] +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func noundef double @_Z13TestSubscriptN4sycl3_V16marrayIdLm8EEEm( +// CHECK-PREVIEW-SAME: ptr noundef readonly byval(%"class.sycl::_V1::marray.19") align 64 captures(none) [[A:%.*]], i64 noundef [[I:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[A]], i64 [[I]] +// CHECK-PREVIEW-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX_I]], align 8, !tbaa [[TBAA34:![0-9]+]] +// CHECK-PREVIEW-NEXT: ret double [[TMP0]] +// +SYCL_EXTERNAL double TestSubscript(marray a, size_t i) { + return a[i]; +} + +// CHECK-LABEL: define dso_local spir_func void @_Z19TestSubscriptAssignRN4sycl3_V16marrayIsLm16EEEms( +// CHECK-SAME: ptr addrspace(4) noundef writeonly align 2 captures(none) dereferenceable(32) [[A:%.*]], i64 noundef [[I:%.*]], i16 noundef signext [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[A]], i64 [[I]] +// CHECK-NEXT: store i16 [[V]], ptr addrspace(4) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA37:![0-9]+]] +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z19TestSubscriptAssignRN4sycl3_V16marrayIsLm16EEEms( +// CHECK-PREVIEW-SAME: ptr addrspace(4) noundef writeonly align 32 captures(none) dereferenceable(32) [[A:%.*]], i64 noundef [[I:%.*]], i16 noundef signext [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[A]], i64 [[I]] +// CHECK-PREVIEW-NEXT: store i16 [[V]], ptr addrspace(4) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA36:![0-9]+]] +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL void TestSubscriptAssign(marray &a, size_t i, + std::int16_t v) { + a[i] = v; +} + +// CHECK-LABEL: define dso_local spir_func void @_Z10TestAssignRN4sycl3_V16marrayIfLm4EEES2_( +// CHECK-SAME: ptr addrspace(4) noundef writeonly align 4 captures(none) dereferenceable(16) initializes((0, 16)) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.19") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR4]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 4 [[A]], ptr align 4 [[B]], i64 16, i1 false), !tbaa.struct [[TBAA_STRUCT39:![0-9]+]] +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z10TestAssignRN4sycl3_V16marrayIfLm4EEES2_( +// CHECK-PREVIEW-SAME: ptr addrspace(4) noundef writeonly align 16 captures(none) dereferenceable(16) initializes((0, 16)) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.21") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR4]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 16 [[A]], ptr align 16 [[B]], i64 16, i1 false), !tbaa.struct [[TBAA_STRUCT38:![0-9]+]] +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL void TestAssign(marray &a, marray b) { + a = b; +} + +// CHECK-LABEL: define dso_local spir_func void @_Z16TestAssignScalarRN4sycl3_V16marrayIaLm3EEEa( +// CHECK-SAME: ptr addrspace(4) noundef writeonly align 1 captures(none) dereferenceable(3) [[A:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK: for.cond.i: +// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 3 +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V16MARRAYIALM3EEASERKA_EXIT:%.*]] +// CHECK: for.body.i: +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[A]], i64 [[I_0_I]] +// CHECK-NEXT: store i8 [[B]], ptr addrspace(4) [[ARRAYIDX_I]], align 1, !tbaa [[TBAA22:![0-9]+]] +// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK: _ZN4sycl3_V16marrayIaLm3EEaSERKa.exit: +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z16TestAssignScalarRN4sycl3_V16marrayIaLm3EEEa( +// CHECK-PREVIEW-SAME: ptr addrspace(4) noundef writeonly align 4 captures(none) dereferenceable(3) [[A:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK-PREVIEW: for.cond.i: +// CHECK-PREVIEW-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 3 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V16MARRAYIALM3EEASERKA_EXIT:%.*]] +// CHECK-PREVIEW: for.body.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[A]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: store i8 [[B]], ptr addrspace(4) [[ARRAYIDX_I]], align 1, !tbaa [[TBAA22:![0-9]+]] +// CHECK-PREVIEW-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP39:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V16marrayIaLm3EEaSERKa.exit: +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL void TestAssignScalar(marray &a, std::int8_t b) { + a = b; +} + +/*************** Binary Arithmetic Ops ******************/ + +// CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V16marrayIlLm4EEES2_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.21") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.21") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.21") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) +// CHECK-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK: for.cond.i: +// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1PLERKNS0_6MARRAYILLM4EEES4__EXIT:%.*]] +// CHECK: for.body.i: +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[A]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA44:![0-9]+]], !noalias [[META41]] +// CHECK-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[B]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX_I7_I]], align 8, !tbaa [[TBAA44]], !noalias [[META41]] +// CHECK-NEXT: [[ADD_I:%.*]] = add nsw i64 [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [8 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I]] +// CHECK-NEXT: store i64 [[ADD_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 8, !tbaa [[TBAA44]], !alias.scope [[META41]] +// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK: _ZN4sycl3_V1plERKNS0_6marrayIlLm4EEES4_.exit: +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V16marrayIlLm4EEES2_( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.24") align 32 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.24") align 32 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.24") align 32 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]]) +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK-PREVIEW: for.cond.i: +// CHECK-PREVIEW-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1PLERKNS0_6MARRAYILLM4EEES4__EXIT:%.*]] +// CHECK-PREVIEW: for.body.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[A]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA43:![0-9]+]], !noalias [[META40]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[B]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX_I7_I]], align 8, !tbaa [[TBAA43]], !noalias [[META40]] +// CHECK-PREVIEW-NEXT: [[ADD_I:%.*]] = add nsw i64 [[TMP0]], [[TMP1]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [8 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: store i64 [[ADD_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 8, !tbaa [[TBAA43]], !alias.scope [[META40]] +// CHECK-PREVIEW-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP45:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V1plERKNS0_6marrayIlLm4EEES4_.exit: +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL auto TestAdd(marray a, + marray b) { + return a + b; +} + +// marray OP scalar and scalar OP marray overloads. +// CHECK-LABEL: define dso_local spir_func void @_Z9TestAddSRN4sycl3_V16marrayIsLm8EEEs( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.22") align 2 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.22") align 2 captures(none) [[A:%.*]], i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca %"class.sycl::_V1::marray.22", align 2 +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META47:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-NEXT: store i16 [[B]], ptr [[REF_TMP_I]], align 2, !tbaa [[TBAA37]], !noalias [[META47]] +// CHECK-NEXT: br label [[ARRAYINIT_BODY_I_I:%.*]] +// CHECK: arrayinit.body.i.i: +// CHECK-NEXT: [[ARRAYINIT_CUR_IDX_I_I:%.*]] = phi i64 [ 2, [[ENTRY:%.*]] ], [ [[ARRAYINIT_CUR_ADD_I_I:%.*]], [[ARRAYINIT_BODY_I_I]] ] +// CHECK-NEXT: [[ARRAYINIT_CUR_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP_I]], i64 [[ARRAYINIT_CUR_IDX_I_I]] +// CHECK-NEXT: store i16 0, ptr [[ARRAYINIT_CUR_PTR_I_I]], align 2, !tbaa [[TBAA37]], !noalias [[META47]] +// CHECK-NEXT: [[ARRAYINIT_CUR_ADD_I_I]] = add nuw nsw i64 [[ARRAYINIT_CUR_IDX_I_I]], 2 +// CHECK-NEXT: [[ARRAYINIT_DONE_I_I:%.*]] = icmp eq i64 [[ARRAYINIT_CUR_ADD_I_I]], 16 +// CHECK-NEXT: br i1 [[ARRAYINIT_DONE_I_I]], label [[FOR_COND_I_I_I:%.*]], label [[ARRAYINIT_BODY_I_I]] +// CHECK: for.cond.i.i.i: +// CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i64 [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ], [ 0, [[ARRAYINIT_BODY_I_I]] ] +// CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I_I]], 8 +// CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V16MARRAYISLM8EEC2ERKS_EXIT_I:%.*]] +// CHECK: for.body.i.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[REF_TMP_I]], i64 [[I_0_I_I_I]] +// CHECK-NEXT: store i16 [[B]], ptr [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA37]], !noalias [[META47]] +// CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i64 [[I_0_I_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP50:![0-9]+]] +// CHECK: _ZN4sycl3_V16marrayIsLm8EEC2ERKs.exit.i: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META51:![0-9]+]]) +// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK: for.cond.i.i: +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[_ZN4SYCL3_V16MARRAYISLM8EEC2ERKS_EXIT_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 8 +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V1PLISEENST9ENABLE_IFIXAASR3STDE16IS_CONVERTIBLE_VIST_EOOSR3STDE16IS_FUNDAMENTAL_VIS3_ESR3STDE9IS_SAME_VINST12REMOVE_CONSTIS3_E4TYPEENS0_6DETAIL9HALF_IMPL4HALFEEENS0_6MARRAYISLM8EEEE4TYPEERKSB_RKS3__EXIT:%.*]] +// CHECK: for.body.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I2_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[A]], i64 [[I_0_I_I]] +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX_I_I2_I]], align 2, !tbaa [[TBAA37]], !noalias [[META54:![0-9]+]] +// CHECK-NEXT: [[ARRAYIDX_I9_I_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[REF_TMP_I]], i64 [[I_0_I_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX_I9_I_I]], align 2, !tbaa [[TBAA37]], !noalias [[META54]] +// CHECK-NEXT: [[ADD_I_I:%.*]] = add i16 [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[ARRAYIDX_I10_I_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[ADD_I_I]], ptr addrspace(4) [[ARRAYIDX_I10_I_I]], align 2, !tbaa [[TBAA37]], !alias.scope [[META54]] +// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP55:![0-9]+]] +// CHECK: _ZN4sycl3_V1plIsEENSt9enable_ifIXaasr3stdE16is_convertible_vIsT_Eoosr3stdE16is_fundamental_vIS3_Esr3stdE9is_same_vINSt12remove_constIS3_E4typeENS0_6detail9half_impl4halfEEENS0_6marrayIsLm8EEEE4typeERKSB_RKS3_.exit: +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z9TestAddSRN4sycl3_V16marrayIsLm8EEEs( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.25") align 16 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.25") align 16 captures(none) [[A:%.*]], i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: [[REF_TMP_I:%.*]] = alloca %"class.sycl::_V1::marray.25", align 16 +// CHECK-PREVIEW-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META46:![0-9]+]]) +// CHECK-PREVIEW-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-PREVIEW-NEXT: store i16 [[B]], ptr [[REF_TMP_I]], align 2, !tbaa [[TBAA36]], !noalias [[META46]] +// CHECK-PREVIEW-NEXT: br label [[ARRAYINIT_BODY_I_I:%.*]] +// CHECK-PREVIEW: arrayinit.body.i.i: +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_CUR_IDX_I_I:%.*]] = phi i64 [ 2, [[ENTRY:%.*]] ], [ [[ARRAYINIT_CUR_ADD_I_I:%.*]], [[ARRAYINIT_BODY_I_I]] ] +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_CUR_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP_I]], i64 [[ARRAYINIT_CUR_IDX_I_I]] +// CHECK-PREVIEW-NEXT: store i16 0, ptr [[ARRAYINIT_CUR_PTR_I_I]], align 2, !tbaa [[TBAA36]], !noalias [[META46]] +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_CUR_ADD_I_I]] = add nuw nsw i64 [[ARRAYINIT_CUR_IDX_I_I]], 2 +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_DONE_I_I:%.*]] = icmp eq i64 [[ARRAYINIT_CUR_ADD_I_I]], 16 +// CHECK-PREVIEW-NEXT: br i1 [[ARRAYINIT_DONE_I_I]], label [[FOR_COND_I_I_I:%.*]], label [[ARRAYINIT_BODY_I_I]] +// CHECK-PREVIEW: for.cond.i.i.i: +// CHECK-PREVIEW-NEXT: [[I_0_I_I_I:%.*]] = phi i64 [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ], [ 0, [[ARRAYINIT_BODY_I_I]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I_I]], 8 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V16MARRAYISLM8EEC2ERKS_EXIT_I:%.*]] +// CHECK-PREVIEW: for.body.i.i.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[REF_TMP_I]], i64 [[I_0_I_I_I]] +// CHECK-PREVIEW-NEXT: store i16 [[B]], ptr [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA36]], !noalias [[META46]] +// CHECK-PREVIEW-NEXT: [[INC_I_I_I]] = add nuw nsw i64 [[I_0_I_I_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP49:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V16marrayIsLm8EEC2ERKs.exit.i: +// CHECK-PREVIEW-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META50:![0-9]+]]) +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK-PREVIEW: for.cond.i.i: +// CHECK-PREVIEW-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[_ZN4SYCL3_V16MARRAYISLM8EEC2ERKS_EXIT_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 8 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V1PLISEENST9ENABLE_IFIXAASR3STDE16IS_CONVERTIBLE_VIST_EOOSR3STDE16IS_FUNDAMENTAL_VIS3_ESR3STDE9IS_SAME_VINST12REMOVE_CONSTIS3_E4TYPEENS0_6DETAIL9HALF_IMPL4HALFEEENS0_6MARRAYISLM8EEEE4TYPEERKSB_RKS3__EXIT:%.*]] +// CHECK-PREVIEW: for.body.i.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I2_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[A]], i64 [[I_0_I_I]] +// CHECK-PREVIEW-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX_I_I2_I]], align 2, !tbaa [[TBAA36]], !noalias [[META53:![0-9]+]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I9_I_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[REF_TMP_I]], i64 [[I_0_I_I]] +// CHECK-PREVIEW-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX_I9_I_I]], align 2, !tbaa [[TBAA36]], !noalias [[META53]] +// CHECK-PREVIEW-NEXT: [[ADD_I_I:%.*]] = add i16 [[TMP0]], [[TMP1]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I10_I_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I_I]] +// CHECK-PREVIEW-NEXT: store i16 [[ADD_I_I]], ptr addrspace(4) [[ARRAYIDX_I10_I_I]], align 2, !tbaa [[TBAA36]], !alias.scope [[META53]] +// CHECK-PREVIEW-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP54:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V1plIsEENSt9enable_ifIXaasr3stdE16is_convertible_vIsT_Eoosr3stdE16is_fundamental_vIS3_Esr3stdE9is_same_vINSt12remove_constIS3_E4typeENS0_6detail9half_impl4halfEEENS0_6marrayIsLm8EEEE4typeERKSB_RKS3_.exit: +// CHECK-PREVIEW-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL auto TestAddSR(marray a, std::int16_t b) { + return a + b; +} + +// Floating-point arithmetic (% is excluded for float/double/half). +// CHECK-LABEL: define dso_local spir_func void @_Z8TestAddFN4sycl3_V16marrayIdLm16EEES2_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.23") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.23") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.23") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META56:![0-9]+]]) +// CHECK-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK: for.cond.i: +// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 16 +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1PLERKNS0_6MARRAYIDLM16EEES4__EXIT:%.*]] +// CHECK: for.body.i: +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[A]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA35]], !noalias [[META56]] +// CHECK-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[B]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX_I7_I]], align 8, !tbaa [[TBAA35]], !noalias [[META56]] +// CHECK-NEXT: [[ADD_I:%.*]] = fadd double [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [8 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I]] +// CHECK-NEXT: store double [[ADD_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 8, !tbaa [[TBAA35]], !alias.scope [[META56]] +// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP59:![0-9]+]] +// CHECK: _ZN4sycl3_V1plERKNS0_6marrayIdLm16EEES4_.exit: +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z8TestAddFN4sycl3_V16marrayIdLm16EEES2_( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.26") align 64 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.26") align 64 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.26") align 64 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK-PREVIEW: for.cond.i: +// CHECK-PREVIEW-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 16 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1PLERKNS0_6MARRAYIDLM16EEES4__EXIT:%.*]] +// CHECK-PREVIEW: for.body.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[A]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA34]], !noalias [[META55]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[B]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX_I7_I]], align 8, !tbaa [[TBAA34]], !noalias [[META55]] +// CHECK-PREVIEW-NEXT: [[ADD_I:%.*]] = fadd double [[TMP0]], [[TMP1]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [8 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: store double [[ADD_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 8, !tbaa [[TBAA34]], !alias.scope [[META55]] +// CHECK-PREVIEW-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP58:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V1plERKNS0_6marrayIdLm16EEES4_.exit: +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL auto TestAddF(marray a, marray b) { + return a + b; +} + +// Padded size-3 arithmetic to compare alloca/byval alignment. +// CHECK-LABEL: define dso_local spir_func void @_Z9TestAddF3N4sycl3_V16marrayINS0_6detail9half_impl4halfELm3EEES5_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.24") align 2 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.24") align 2 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.24") align 2 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]]) +// CHECK-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK: for.cond.i: +// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 3 +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1PLERKNS0_6MARRAYINS0_6DETAIL9HALF_IMPL4HALFELM3EEES7__EXIT:%.*]] +// CHECK: for.body.i: +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[A]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA29]], !noalias [[META60]] +// CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[B]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[ARRAYIDX_I8_I]], align 2, !tbaa [[TBAA29]], !noalias [[META60]] +// CHECK-NEXT: [[ADD_I_I_I:%.*]] = fadd half [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[ARRAYIDX_I11_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I]] +// CHECK-NEXT: store half [[ADD_I_I_I]], ptr addrspace(4) [[ARRAYIDX_I11_I]], align 2, !tbaa [[TBAA29]], !alias.scope [[META60]] +// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP63:![0-9]+]] +// CHECK: _ZN4sycl3_V1plERKNS0_6marrayINS0_6detail9half_impl4halfELm3EEES7_.exit: +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z9TestAddF3N4sycl3_V16marrayINS0_6detail9half_impl4halfELm3EEES5_( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.27") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.27") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.27") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META59:![0-9]+]]) +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK-PREVIEW: for.cond.i: +// CHECK-PREVIEW-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 3 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1PLERKNS0_6MARRAYINS0_6DETAIL9HALF_IMPL4HALFELM3EEES7__EXIT:%.*]] +// CHECK-PREVIEW: for.body.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[A]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: [[TMP0:%.*]] = load half, ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA28]], !noalias [[META59]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[B]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: [[TMP1:%.*]] = load half, ptr [[ARRAYIDX_I8_I]], align 2, !tbaa [[TBAA28]], !noalias [[META59]] +// CHECK-PREVIEW-NEXT: [[ADD_I_I_I:%.*]] = fadd half [[TMP1]], [[TMP0]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I11_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: store half [[ADD_I_I_I]], ptr addrspace(4) [[ARRAYIDX_I11_I]], align 2, !tbaa [[TBAA28]], !alias.scope [[META59]] +// CHECK-PREVIEW-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP62:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V1plERKNS0_6marrayINS0_6detail9half_impl4halfELm3EEES7_.exit: +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL auto TestAddF3(marray a, marray b) { + return a + b; +} + +/*************** Bitwise Ops (integer only) ******************/ + +// CHECK-LABEL: define dso_local spir_func void @_Z7TestAndN4sycl3_V16marrayIjLm8EEES2_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.25") align 4 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.25") align 4 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.25") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META64:![0-9]+]]) +// CHECK-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK: for.cond.i: +// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 8 +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1ANIJNS0_6MARRAYIJLM8EEEEES3_RKS3_S5__EXIT:%.*]] +// CHECK: for.body.i: +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[A]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA5]], !noalias [[META64]] +// CHECK-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[B]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_I7_I]], align 4, !tbaa [[TBAA5]], !noalias [[META64]] +// CHECK-NEXT: [[AND_I:%.*]] = and i32 [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I]] +// CHECK-NEXT: store i32 [[AND_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 4, !tbaa [[TBAA5]], !alias.scope [[META64]] +// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP67:![0-9]+]] +// CHECK: _ZN4sycl3_V1anIjNS0_6marrayIjLm8EEEEES3_RKS3_S5_.exit: +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z7TestAndN4sycl3_V16marrayIjLm8EEES2_( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.29") align 32 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.29") align 32 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.29") align 32 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META63:![0-9]+]]) +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK-PREVIEW: for.cond.i: +// CHECK-PREVIEW-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 8 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1ANIJNS0_6MARRAYIJLM8EEEEES3_RKS3_S5__EXIT:%.*]] +// CHECK-PREVIEW: for.body.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[A]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA5]], !noalias [[META63]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[B]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_I7_I]], align 4, !tbaa [[TBAA5]], !noalias [[META63]] +// CHECK-PREVIEW-NEXT: [[AND_I:%.*]] = and i32 [[TMP0]], [[TMP1]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: store i32 [[AND_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 4, !tbaa [[TBAA5]], !alias.scope [[META63]] +// CHECK-PREVIEW-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP66:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V1anIjNS0_6marrayIjLm8EEEEES3_RKS3_S5_.exit: +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL auto TestAnd(marray a, + marray b) { + return a & b; +} + +// CHECK-LABEL: define dso_local spir_func void @_Z9TestAndSRN4sycl3_V16marrayIaLm16EEEa( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.26") align 1 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.26") align 1 captures(none) [[A:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca %"class.sycl::_V1::marray.26", align 1 +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META68:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-NEXT: store i8 [[B]], ptr [[REF_TMP_I]], align 1, !tbaa [[TBAA22]], !noalias [[META68]] +// CHECK-NEXT: br label [[ARRAYINIT_BODY_I_I:%.*]] +// CHECK: arrayinit.body.i.i: +// CHECK-NEXT: [[ARRAYINIT_CUR_IDX_I_I:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[ARRAYINIT_CUR_ADD_I_I:%.*]], [[ARRAYINIT_BODY_I_I]] ] +// CHECK-NEXT: [[ARRAYINIT_CUR_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP_I]], i64 [[ARRAYINIT_CUR_IDX_I_I]] +// CHECK-NEXT: store i8 0, ptr [[ARRAYINIT_CUR_PTR_I_I]], align 1, !tbaa [[TBAA22]], !noalias [[META68]] +// CHECK-NEXT: [[ARRAYINIT_CUR_ADD_I_I]] = add nuw nsw i64 [[ARRAYINIT_CUR_IDX_I_I]], 1 +// CHECK-NEXT: [[ARRAYINIT_DONE_I_I:%.*]] = icmp eq i64 [[ARRAYINIT_CUR_ADD_I_I]], 16 +// CHECK-NEXT: br i1 [[ARRAYINIT_DONE_I_I]], label [[FOR_COND_I_I_I:%.*]], label [[ARRAYINIT_BODY_I_I]] +// CHECK: for.cond.i.i.i: +// CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i64 [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ], [ 0, [[ARRAYINIT_BODY_I_I]] ] +// CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I_I]], 16 +// CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V16MARRAYIALM16EEC2ERKA_EXIT_I:%.*]] +// CHECK: for.body.i.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP_I]], i64 [[I_0_I_I_I]] +// CHECK-NEXT: store i8 [[B]], ptr [[ARRAYIDX_I_I_I]], align 1, !tbaa [[TBAA22]], !noalias [[META68]] +// CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i64 [[I_0_I_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP71:![0-9]+]] +// CHECK: _ZN4sycl3_V16marrayIaLm16EEC2ERKa.exit.i: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META72:![0-9]+]]) +// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK: for.cond.i.i: +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[_ZN4SYCL3_V16MARRAYIALM16EEC2ERKA_EXIT_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 16 +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V1ANIAAEENST9ENABLE_IFIXAAAASR3STDE16IS_CONVERTIBLE_VIT_AESR3STDE13IS_INTEGRAL_VIS3_ESR3STDE13IS_INTEGRAL_VIT0_EENS0_6MARRAYIALM16EEEE4TYPEERKS6_RKS3__EXIT:%.*]] +// CHECK: for.body.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I2_I:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[I_0_I_I]] +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX_I_I2_I]], align 1, !tbaa [[TBAA22]], !noalias [[META75:![0-9]+]] +// CHECK-NEXT: [[ARRAYIDX_I10_I_I:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP_I]], i64 [[I_0_I_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX_I10_I_I]], align 1, !tbaa [[TBAA22]], !noalias [[META75]] +// CHECK-NEXT: [[AND9_I_I:%.*]] = and i8 [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[ARRAYIDX_I11_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I_I]] +// CHECK-NEXT: store i8 [[AND9_I_I]], ptr addrspace(4) [[ARRAYIDX_I11_I_I]], align 1, !tbaa [[TBAA22]], !alias.scope [[META75]] +// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP76:![0-9]+]] +// CHECK: _ZN4sycl3_V1anIaaEENSt9enable_ifIXaaaasr3stdE16is_convertible_vIT_aEsr3stdE13is_integral_vIS3_Esr3stdE13is_integral_vIT0_EENS0_6marrayIaLm16EEEE4typeERKS6_RKS3_.exit: +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z9TestAndSRN4sycl3_V16marrayIaLm16EEEa( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.30") align 16 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.30") align 16 captures(none) [[A:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: [[REF_TMP_I:%.*]] = alloca %"class.sycl::_V1::marray.30", align 16 +// CHECK-PREVIEW-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META67:![0-9]+]]) +// CHECK-PREVIEW-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-PREVIEW-NEXT: store i8 [[B]], ptr [[REF_TMP_I]], align 1, !tbaa [[TBAA22]], !noalias [[META67]] +// CHECK-PREVIEW-NEXT: br label [[ARRAYINIT_BODY_I_I:%.*]] +// CHECK-PREVIEW: arrayinit.body.i.i: +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_CUR_IDX_I_I:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[ARRAYINIT_CUR_ADD_I_I:%.*]], [[ARRAYINIT_BODY_I_I]] ] +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_CUR_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP_I]], i64 [[ARRAYINIT_CUR_IDX_I_I]] +// CHECK-PREVIEW-NEXT: store i8 0, ptr [[ARRAYINIT_CUR_PTR_I_I]], align 1, !tbaa [[TBAA22]], !noalias [[META67]] +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_CUR_ADD_I_I]] = add nuw nsw i64 [[ARRAYINIT_CUR_IDX_I_I]], 1 +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_DONE_I_I:%.*]] = icmp eq i64 [[ARRAYINIT_CUR_ADD_I_I]], 16 +// CHECK-PREVIEW-NEXT: br i1 [[ARRAYINIT_DONE_I_I]], label [[FOR_COND_I_I_I:%.*]], label [[ARRAYINIT_BODY_I_I]] +// CHECK-PREVIEW: for.cond.i.i.i: +// CHECK-PREVIEW-NEXT: [[I_0_I_I_I:%.*]] = phi i64 [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ], [ 0, [[ARRAYINIT_BODY_I_I]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I_I]], 16 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V16MARRAYIALM16EEC2ERKA_EXIT_I:%.*]] +// CHECK-PREVIEW: for.body.i.i.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP_I]], i64 [[I_0_I_I_I]] +// CHECK-PREVIEW-NEXT: store i8 [[B]], ptr [[ARRAYIDX_I_I_I]], align 1, !tbaa [[TBAA22]], !noalias [[META67]] +// CHECK-PREVIEW-NEXT: [[INC_I_I_I]] = add nuw nsw i64 [[I_0_I_I_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP70:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V16marrayIaLm16EEC2ERKa.exit.i: +// CHECK-PREVIEW-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META71:![0-9]+]]) +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK-PREVIEW: for.cond.i.i: +// CHECK-PREVIEW-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[_ZN4SYCL3_V16MARRAYIALM16EEC2ERKA_EXIT_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 16 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V1ANIAAEENST9ENABLE_IFIXAAAASR3STDE16IS_CONVERTIBLE_VIT_AESR3STDE13IS_INTEGRAL_VIS3_ESR3STDE13IS_INTEGRAL_VIT0_EENS0_6MARRAYIALM16EEEE4TYPEERKS6_RKS3__EXIT:%.*]] +// CHECK-PREVIEW: for.body.i.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I2_I:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[I_0_I_I]] +// CHECK-PREVIEW-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX_I_I2_I]], align 1, !tbaa [[TBAA22]], !noalias [[META74:![0-9]+]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I10_I_I:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP_I]], i64 [[I_0_I_I]] +// CHECK-PREVIEW-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX_I10_I_I]], align 1, !tbaa [[TBAA22]], !noalias [[META74]] +// CHECK-PREVIEW-NEXT: [[AND9_I_I:%.*]] = and i8 [[TMP0]], [[TMP1]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I11_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I_I]] +// CHECK-PREVIEW-NEXT: store i8 [[AND9_I_I]], ptr addrspace(4) [[ARRAYIDX_I11_I_I]], align 1, !tbaa [[TBAA22]], !alias.scope [[META74]] +// CHECK-PREVIEW-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP75:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V1anIaaEENSt9enable_ifIXaaaasr3stdE16is_convertible_vIT_aEsr3stdE13is_integral_vIS3_Esr3stdE13is_integral_vIT0_EENS0_6marrayIaLm16EEEE4typeERKS6_RKS3_.exit: +// CHECK-PREVIEW-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL auto TestAndSR(marray a, std::int8_t b) { + return a & b; +} + +/*************** Relational Ops (return marray) ******************/ + +// CHECK-LABEL: define dso_local spir_func void @_Z6TestEqN4sycl3_V16marrayIfLm4EEES2_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.27") align 1 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.19") align 4 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.19") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META77:![0-9]+]]) +// CHECK-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK: for.cond.i: +// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1EQERKNS0_6MARRAYIFLM4EEES4__EXIT:%.*]] +// CHECK: for.body.i: +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[A]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA31]], !noalias [[META77]] +// CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[B]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX_I8_I]], align 4, !tbaa [[TBAA31]], !noalias [[META77]] +// CHECK-NEXT: [[CMP2_I:%.*]] = fcmp oeq float [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[CMP2_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr addrspace(4) [[ARRAYIDX_I9_I]], align 1, !tbaa [[TBAA80:![0-9]+]], !alias.scope [[META77]] +// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP82:![0-9]+]] +// CHECK: _ZN4sycl3_V1eqERKNS0_6marrayIfLm4EEES4_.exit: +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z6TestEqN4sycl3_V16marrayIfLm4EEES2_( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.31") align 4 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.21") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.21") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META76:![0-9]+]]) +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK-PREVIEW: for.cond.i: +// CHECK-PREVIEW-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1EQERKNS0_6MARRAYIFLM4EEES4__EXIT:%.*]] +// CHECK-PREVIEW: for.body.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[A]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA30]], !noalias [[META76]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[B]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX_I8_I]], align 4, !tbaa [[TBAA30]], !noalias [[META76]] +// CHECK-PREVIEW-NEXT: [[CMP2_I:%.*]] = fcmp oeq float [[TMP0]], [[TMP1]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: [[STOREDV_I:%.*]] = zext i1 [[CMP2_I]] to i8 +// CHECK-PREVIEW-NEXT: store i8 [[STOREDV_I]], ptr addrspace(4) [[ARRAYIDX_I9_I]], align 1, !tbaa [[TBAA79:![0-9]+]], !alias.scope [[META76]] +// CHECK-PREVIEW-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP81:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V1eqERKNS0_6marrayIfLm4EEES4_.exit: +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL auto TestEq(marray a, marray b) { + return a == b; +} +// CHECK-LABEL: define dso_local spir_func void @_Z8TestEqSRN4sycl3_V16marrayIdLm8EEEd( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.28") align 1 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.17") align 8 captures(none) [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca %"class.sycl::_V1::marray.17", align 8 +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META83:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP_I]], i64 8 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP0]], i8 0, i64 56, i1 false), !noalias [[META83]] +// CHECK-NEXT: store double [[B]], ptr [[REF_TMP_I]], align 8, !tbaa [[TBAA35]], !noalias [[META83]] +// CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK: for.cond.i.i.i: +// CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I_I]], 8 +// CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V16MARRAYIDLM8EEC2ERKD_EXIT_I:%.*]] +// CHECK: for.body.i.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[REF_TMP_I]], i64 [[I_0_I_I_I]] +// CHECK-NEXT: store double [[B]], ptr [[ARRAYIDX_I_I_I]], align 8, !tbaa [[TBAA35]], !noalias [[META83]] +// CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i64 [[I_0_I_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP86:![0-9]+]] +// CHECK: _ZN4sycl3_V16marrayIdLm8EEC2ERKd.exit.i: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META87:![0-9]+]]) +// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK: for.cond.i.i: +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[_ZN4SYCL3_V16MARRAYIDLM8EEC2ERKD_EXIT_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 8 +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V1EQIDEENST9ENABLE_IFIXAASR3STDE16IS_CONVERTIBLE_VIT_DEOOSR3STDE16IS_FUNDAMENTAL_VIS3_ESR3STDE9IS_SAME_VIS3_NS0_6DETAIL9HALF_IMPL4HALFEEENS0_6MARRAYIBLM8EEEE4TYPEERKNS7_IDLM8EEERKS3__EXIT:%.*]] +// CHECK: for.body.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I1_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[A]], i64 [[I_0_I_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX_I_I1_I]], align 8, !tbaa [[TBAA35]], !noalias [[META90:![0-9]+]] +// CHECK-NEXT: [[ARRAYIDX_I8_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[REF_TMP_I]], i64 [[I_0_I_I]] +// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[ARRAYIDX_I8_I_I]], align 8, !tbaa [[TBAA35]], !noalias [[META90]] +// CHECK-NEXT: [[CMP2_I_I:%.*]] = fcmp oeq double [[TMP1]], [[TMP2]] +// CHECK-NEXT: [[ARRAYIDX_I9_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I_I]] +// CHECK-NEXT: [[STOREDV_I_I:%.*]] = zext i1 [[CMP2_I_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I_I]], ptr addrspace(4) [[ARRAYIDX_I9_I_I]], align 1, !tbaa [[TBAA80]], !alias.scope [[META90]] +// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP91:![0-9]+]] +// CHECK: _ZN4sycl3_V1eqIdEENSt9enable_ifIXaasr3stdE16is_convertible_vIT_dEoosr3stdE16is_fundamental_vIS3_Esr3stdE9is_same_vIS3_NS0_6detail9half_impl4halfEEENS0_6marrayIbLm8EEEE4typeERKNS7_IdLm8EEERKS3_.exit: +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z8TestEqSRN4sycl3_V16marrayIdLm8EEEd( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.32") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.19") align 64 captures(none) [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: [[REF_TMP_I:%.*]] = alloca %"class.sycl::_V1::marray.19", align 64 +// CHECK-PREVIEW-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META82:![0-9]+]]) +// CHECK-PREVIEW-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-PREVIEW-NEXT: call void @llvm.memset.p0.i64(ptr align 64 [[REF_TMP_I]], i8 0, i64 64, i1 false), !noalias [[META82]] +// CHECK-PREVIEW-NEXT: store double [[B]], ptr [[REF_TMP_I]], align 8, !tbaa [[TBAA34]], !noalias [[META82]] +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-PREVIEW: for.cond.i.i.i: +// CHECK-PREVIEW-NEXT: [[I_0_I_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I_I]], 8 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V16MARRAYIDLM8EEC2ERKD_EXIT_I:%.*]] +// CHECK-PREVIEW: for.body.i.i.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[REF_TMP_I]], i64 [[I_0_I_I_I]] +// CHECK-PREVIEW-NEXT: store double [[B]], ptr [[ARRAYIDX_I_I_I]], align 8, !tbaa [[TBAA34]], !noalias [[META82]] +// CHECK-PREVIEW-NEXT: [[INC_I_I_I]] = add nuw nsw i64 [[I_0_I_I_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP85:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V16marrayIdLm8EEC2ERKd.exit.i: +// CHECK-PREVIEW-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META86:![0-9]+]]) +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK-PREVIEW: for.cond.i.i: +// CHECK-PREVIEW-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[_ZN4SYCL3_V16MARRAYIDLM8EEC2ERKD_EXIT_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 8 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V1EQIDEENST9ENABLE_IFIXAASR3STDE16IS_CONVERTIBLE_VIT_DEOOSR3STDE16IS_FUNDAMENTAL_VIS3_ESR3STDE9IS_SAME_VIS3_NS0_6DETAIL9HALF_IMPL4HALFEEENS0_6MARRAYIBLM8EEEE4TYPEERKNS7_IDLM8EEERKS3__EXIT:%.*]] +// CHECK-PREVIEW: for.body.i.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I1_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[A]], i64 [[I_0_I_I]] +// CHECK-PREVIEW-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX_I_I1_I]], align 8, !tbaa [[TBAA34]], !noalias [[META89:![0-9]+]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I8_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[REF_TMP_I]], i64 [[I_0_I_I]] +// CHECK-PREVIEW-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX_I8_I_I]], align 8, !tbaa [[TBAA34]], !noalias [[META89]] +// CHECK-PREVIEW-NEXT: [[CMP2_I_I:%.*]] = fcmp oeq double [[TMP0]], [[TMP1]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I9_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I_I]] +// CHECK-PREVIEW-NEXT: [[STOREDV_I_I:%.*]] = zext i1 [[CMP2_I_I]] to i8 +// CHECK-PREVIEW-NEXT: store i8 [[STOREDV_I_I]], ptr addrspace(4) [[ARRAYIDX_I9_I_I]], align 1, !tbaa [[TBAA79]], !alias.scope [[META89]] +// CHECK-PREVIEW-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP90:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V1eqIdEENSt9enable_ifIXaasr3stdE16is_convertible_vIT_dEoosr3stdE16is_fundamental_vIS3_Esr3stdE9is_same_vIS3_NS0_6detail9half_impl4halfEEENS0_6marrayIbLm8EEEE4typeERKNS7_IdLm8EEERKS3_.exit: +// CHECK-PREVIEW-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL auto TestEqSR(marray a, double b) { return a == b; } + +/*************** Logical Ops (return marray) ******************/ + +// CHECK-LABEL: define dso_local spir_func void @_Z8TestLAndN4sycl3_V16marrayIlLm4EEES2_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.27") align 1 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.21") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.21") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META92:![0-9]+]]) +// CHECK-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK: for.cond.i: +// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[LAND_END_I:%.*]] ] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I:%.*]], label [[_ZN4SYCL3_V1AAERKNS0_6MARRAYILLM4EEES4__EXIT:%.*]] +// CHECK: for.body.i: +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[A]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA44]], !noalias [[META92]] +// CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i64 [[TMP0]], 0 +// CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label [[LAND_END_I]], label [[LAND_RHS_I:%.*]] +// CHECK: land.rhs.i: +// CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[B]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX_I8_I]], align 8, !tbaa [[TBAA44]], !noalias [[META92]] +// CHECK-NEXT: [[TOBOOL2_I:%.*]] = icmp ne i64 [[TMP1]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TOBOOL2_I]] to i8 +// CHECK-NEXT: br label [[LAND_END_I]] +// CHECK: land.end.i: +// CHECK-NEXT: [[STOREDV_I:%.*]] = phi i8 [ 0, [[FOR_BODY_I]] ], [ [[TMP2]], [[LAND_RHS_I]] ] +// CHECK-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I]] +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr addrspace(4) [[ARRAYIDX_I9_I]], align 1, !tbaa [[TBAA80]], !alias.scope [[META92]] +// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP95:![0-9]+]] +// CHECK: _ZN4sycl3_V1aaERKNS0_6marrayIlLm4EEES4_.exit: +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z8TestLAndN4sycl3_V16marrayIlLm4EEES2_( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.31") align 4 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.24") align 32 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.24") align 32 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META91:![0-9]+]]) +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK-PREVIEW: for.cond.i: +// CHECK-PREVIEW-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[LAND_END_I:%.*]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I:%.*]], label [[_ZN4SYCL3_V1AAERKNS0_6MARRAYILLM4EEES4__EXIT:%.*]] +// CHECK-PREVIEW: for.body.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[A]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA43]], !noalias [[META91]] +// CHECK-PREVIEW-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i64 [[TMP0]], 0 +// CHECK-PREVIEW-NEXT: br i1 [[TOBOOL_NOT_I]], label [[LAND_END_I]], label [[LAND_RHS_I:%.*]] +// CHECK-PREVIEW: land.rhs.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[B]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX_I8_I]], align 8, !tbaa [[TBAA43]], !noalias [[META91]] +// CHECK-PREVIEW-NEXT: [[TOBOOL2_I:%.*]] = icmp ne i64 [[TMP1]], 0 +// CHECK-PREVIEW-NEXT: [[TMP2:%.*]] = zext i1 [[TOBOOL2_I]] to i8 +// CHECK-PREVIEW-NEXT: br label [[LAND_END_I]] +// CHECK-PREVIEW: land.end.i: +// CHECK-PREVIEW-NEXT: [[STOREDV_I:%.*]] = phi i8 [ 0, [[FOR_BODY_I]] ], [ [[TMP2]], [[LAND_RHS_I]] ] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: store i8 [[STOREDV_I]], ptr addrspace(4) [[ARRAYIDX_I9_I]], align 1, !tbaa [[TBAA79]], !alias.scope [[META91]] +// CHECK-PREVIEW-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP94:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V1aaERKNS0_6marrayIlLm4EEES4_.exit: +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL auto TestLAnd(marray a, + marray b) { + return a && b; +} +// CHECK-LABEL: define dso_local spir_func void @_Z7TestLOrN4sycl3_V16marrayIaLm16EEES2_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.29") align 1 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.26") align 1 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.26") align 1 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META96:![0-9]+]]) +// CHECK-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK: for.cond.i: +// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[LOR_END_I:%.*]] ] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 16 +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I:%.*]], label [[_ZN4SYCL3_V1OOERKNS0_6MARRAYIALM16EEES4__EXIT:%.*]] +// CHECK: for.body.i: +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA22]], !noalias [[META96]] +// CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0 +// CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label [[LOR_RHS_I:%.*]], label [[LOR_END_I]] +// CHECK: lor.rhs.i: +// CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX_I8_I]], align 1, !tbaa [[TBAA22]], !noalias [[META96]] +// CHECK-NEXT: [[TOBOOL2_I:%.*]] = icmp ne i8 [[TMP1]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TOBOOL2_I]] to i8 +// CHECK-NEXT: br label [[LOR_END_I]] +// CHECK: lor.end.i: +// CHECK-NEXT: [[STOREDV_I:%.*]] = phi i8 [ 1, [[FOR_BODY_I]] ], [ [[TMP2]], [[LOR_RHS_I]] ] +// CHECK-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I]] +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr addrspace(4) [[ARRAYIDX_I9_I]], align 1, !tbaa [[TBAA80]], !alias.scope [[META96]] +// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP99:![0-9]+]] +// CHECK: _ZN4sycl3_V1ooERKNS0_6marrayIaLm16EEES4_.exit: +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z7TestLOrN4sycl3_V16marrayIaLm16EEES2_( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.33") align 16 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.30") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.30") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META95:![0-9]+]]) +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK-PREVIEW: for.cond.i: +// CHECK-PREVIEW-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[LOR_END_I:%.*]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 16 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I:%.*]], label [[_ZN4SYCL3_V1OOERKNS0_6MARRAYIALM16EEES4__EXIT:%.*]] +// CHECK-PREVIEW: for.body.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA22]], !noalias [[META95]] +// CHECK-PREVIEW-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0 +// CHECK-PREVIEW-NEXT: br i1 [[TOBOOL_NOT_I]], label [[LOR_RHS_I:%.*]], label [[LOR_END_I]] +// CHECK-PREVIEW: lor.rhs.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX_I8_I]], align 1, !tbaa [[TBAA22]], !noalias [[META95]] +// CHECK-PREVIEW-NEXT: [[TOBOOL2_I:%.*]] = icmp ne i8 [[TMP1]], 0 +// CHECK-PREVIEW-NEXT: [[TMP2:%.*]] = zext i1 [[TOBOOL2_I]] to i8 +// CHECK-PREVIEW-NEXT: br label [[LOR_END_I]] +// CHECK-PREVIEW: lor.end.i: +// CHECK-PREVIEW-NEXT: [[STOREDV_I:%.*]] = phi i8 [ 1, [[FOR_BODY_I]] ], [ [[TMP2]], [[LOR_RHS_I]] ] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: store i8 [[STOREDV_I]], ptr addrspace(4) [[ARRAYIDX_I9_I]], align 1, !tbaa [[TBAA79]], !alias.scope [[META95]] +// CHECK-PREVIEW-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP98:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V1ooERKNS0_6marrayIaLm16EEES4_.exit: +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL auto TestLOr(marray a, + marray b) { + return a || b; +} + +/*************** Compound Assignment Ops ******************/ + +// CHECK-LABEL: define dso_local spir_func void @_Z13TestAddAssignRN4sycl3_V16marrayINS0_3ext6oneapi8bfloat16ELm8EEES5_( +// CHECK-SAME: ptr addrspace(4) noundef align 2 dereferenceable(16) [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::marray.30") align 2 [[B:%.*]]) local_unnamed_addr #[[ATTR8:[0-9]+]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: [[REF_TMP_I_I_I:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca %"class.sycl::_V1::marray.30", align 2 +// CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META100:![0-9]+]]) +// CHECK-NEXT: [[REF_TMP_ASCAST_I_I_I:%.*]] = addrspacecast ptr [[REF_TMP_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK: for.cond.i.i: +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 8 +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V1PLERNS0_6MARRAYINS0_3EXT6ONEAPI8BFLOAT16ELM8EEERKS5__EXIT:%.*]] +// CHECK: for.body.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[A]], i64 [[I_0_I_I]] +// CHECK-NEXT: [[ARRAYIDX_I7_I_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[B_ASCAST]], i64 [[I_0_I_I]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP_I_I_I]]), !noalias [[META100]] +// CHECK-NEXT: [[CALL_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I]]) #[[ATTR13:[0-9]+]], !noalias [[META103:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I2_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I7_I_I]]) #[[ATTR13]], !noalias [[META103]] +// CHECK-NEXT: [[ADD_I_I_I:%.*]] = fadd float [[CALL_I_I_I_I_I]], [[CALL_I_I2_I_I_I]] +// CHECK-NEXT: store float [[ADD_I_I_I]], ptr [[REF_TMP_I_I_I]], align 4, !tbaa [[TBAA31]], !noalias [[META103]] +// CHECK-NEXT: [[CALL_I_I3_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I]]) #[[ATTR13]], !noalias [[META103]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP_I_I_I]]), !noalias [[META100]] +// CHECK-NEXT: [[ARRAYIDX_I8_I_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[REF_TMP_I]], i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[CALL_I_I3_I_I_I]], ptr [[ARRAYIDX_I8_I_I]], align 2, !tbaa [[TBAA37]], !alias.scope [[META100]] +// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP106:![0-9]+]] +// CHECK: _ZN4sycl3_V1pLERNS0_6marrayINS0_3ext6oneapi8bfloat16ELm8EEERKS5_.exit: +// CHECK-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 2 [[A]], ptr align 2 [[REF_TMP_I]], i64 16, i1 false), !tbaa.struct [[TBAA_STRUCT39]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z13TestAddAssignRN4sycl3_V16marrayINS0_3ext6oneapi8bfloat16ELm8EEES5_( +// CHECK-PREVIEW-SAME: ptr addrspace(4) noundef align 16 dereferenceable(16) [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::marray.34") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR8:[0-9]+]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: [[REF_TMP_I_I_I:%.*]] = alloca float, align 4 +// CHECK-PREVIEW-NEXT: [[REF_TMP_I:%.*]] = alloca %"class.sycl::_V1::marray.34", align 16 +// CHECK-PREVIEW-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) +// CHECK-PREVIEW-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-PREVIEW-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META99:![0-9]+]]) +// CHECK-PREVIEW-NEXT: [[REF_TMP_ASCAST_I_I_I:%.*]] = addrspacecast ptr [[REF_TMP_I_I_I]] to ptr addrspace(4) +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK-PREVIEW: for.cond.i.i: +// CHECK-PREVIEW-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 8 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V1PLERNS0_6MARRAYINS0_3EXT6ONEAPI8BFLOAT16ELM8EEERKS5__EXIT:%.*]] +// CHECK-PREVIEW: for.body.i.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[A]], i64 [[I_0_I_I]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I7_I_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[B_ASCAST]], i64 [[I_0_I_I]] +// CHECK-PREVIEW-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP_I_I_I]]), !noalias [[META99]] +// CHECK-PREVIEW-NEXT: [[CALL_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I]]) #[[ATTR13:[0-9]+]], !noalias [[META102:![0-9]+]] +// CHECK-PREVIEW-NEXT: [[CALL_I_I2_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I7_I_I]]) #[[ATTR13]], !noalias [[META102]] +// CHECK-PREVIEW-NEXT: [[ADD_I_I_I:%.*]] = fadd float [[CALL_I_I_I_I_I]], [[CALL_I_I2_I_I_I]] +// CHECK-PREVIEW-NEXT: store float [[ADD_I_I_I]], ptr [[REF_TMP_I_I_I]], align 4, !tbaa [[TBAA30]], !noalias [[META102]] +// CHECK-PREVIEW-NEXT: [[CALL_I_I3_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I]]) #[[ATTR13]], !noalias [[META102]] +// CHECK-PREVIEW-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP_I_I_I]]), !noalias [[META99]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I8_I_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[REF_TMP_I]], i64 [[I_0_I_I]] +// CHECK-PREVIEW-NEXT: store i16 [[CALL_I_I3_I_I_I]], ptr [[ARRAYIDX_I8_I_I]], align 2, !tbaa [[TBAA36]], !alias.scope [[META99]] +// CHECK-PREVIEW-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP105:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V1pLERNS0_6marrayINS0_3ext6oneapi8bfloat16ELm8EEERKS5_.exit: +// CHECK-PREVIEW-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 16 [[A]], ptr align 16 [[REF_TMP_I]], i64 16, i1 false), !tbaa.struct [[TBAA_STRUCT38]] +// CHECK-PREVIEW-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL void TestAddAssign(marray &a, marray b) { + a += b; +} + +// Scalar-rhs compound forms. +// CHECK-LABEL: define dso_local spir_func void @_Z14TestAddAssignSRN4sycl3_V16marrayIdLm3EEEd( +// CHECK-SAME: ptr addrspace(4) noundef align 8 captures(none) dereferenceable(24) [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca %"class.sycl::_V1::marray.31", align 8 +// CHECK-NEXT: [[REF_TMP1_I:%.*]] = alloca %"class.sycl::_V1::marray.31", align 8 +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP1_I]]) +// CHECK-NEXT: store double [[B]], ptr [[REF_TMP1_I]], align 8, !tbaa [[TBAA35]] +// CHECK-NEXT: br label [[ARRAYINIT_BODY_I_I:%.*]] +// CHECK: arrayinit.body.i.i: +// CHECK-NEXT: [[ARRAYINIT_CUR_IDX_I_I:%.*]] = phi i64 [ 8, [[ENTRY:%.*]] ], [ [[ARRAYINIT_CUR_ADD_I_I:%.*]], [[ARRAYINIT_BODY_I_I]] ] +// CHECK-NEXT: [[ARRAYINIT_CUR_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP1_I]], i64 [[ARRAYINIT_CUR_IDX_I_I]] +// CHECK-NEXT: store double 0.000000e+00, ptr [[ARRAYINIT_CUR_PTR_I_I]], align 8, !tbaa [[TBAA35]] +// CHECK-NEXT: [[ARRAYINIT_CUR_ADD_I_I]] = add nuw nsw i64 [[ARRAYINIT_CUR_IDX_I_I]], 8 +// CHECK-NEXT: [[ARRAYINIT_DONE_I_I:%.*]] = icmp eq i64 [[ARRAYINIT_CUR_ADD_I_I]], 24 +// CHECK-NEXT: br i1 [[ARRAYINIT_DONE_I_I]], label [[FOR_COND_I_I_I:%.*]], label [[ARRAYINIT_BODY_I_I]] +// CHECK: for.cond.i.i.i: +// CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i64 [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ], [ 0, [[ARRAYINIT_BODY_I_I]] ] +// CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I_I]], 3 +// CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V16MARRAYIDLM3EEC2ERKD_EXIT_I:%.*]] +// CHECK: for.body.i.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[REF_TMP1_I]], i64 [[I_0_I_I_I]] +// CHECK-NEXT: store double [[B]], ptr [[ARRAYIDX_I_I_I]], align 8, !tbaa [[TBAA35]] +// CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i64 [[I_0_I_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP107:![0-9]+]] +// CHECK: _ZN4sycl3_V16marrayIdLm3EEC2ERKd.exit.i: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META108:![0-9]+]]) +// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK: for.cond.i.i: +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[_ZN4SYCL3_V16MARRAYIDLM3EEC2ERKD_EXIT_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 3 +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V1PLILM3EEENST9ENABLE_IFIXNET_LI1EERNS0_6MARRAYIDLM3EEEE4TYPEES5_RKD_EXIT:%.*]] +// CHECK: for.body.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I4_I:%.*]] = getelementptr inbounds nuw [8 x i8], ptr addrspace(4) [[A]], i64 [[I_0_I_I]] +// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[ARRAYIDX_I_I4_I]], align 8, !tbaa [[TBAA35]], !noalias [[META108]] +// CHECK-NEXT: [[ARRAYIDX_I7_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[REF_TMP1_I]], i64 [[I_0_I_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX_I7_I_I]], align 8, !tbaa [[TBAA35]], !noalias [[META108]] +// CHECK-NEXT: [[ADD_I_I:%.*]] = fadd double [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[ARRAYIDX_I8_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[REF_TMP_I]], i64 [[I_0_I_I]] +// CHECK-NEXT: store double [[ADD_I_I]], ptr [[ARRAYIDX_I8_I_I]], align 8, !tbaa [[TBAA35]], !alias.scope [[META108]] +// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP111:![0-9]+]] +// CHECK: _ZN4sycl3_V1pLILm3EEENSt9enable_ifIXneT_Li1EERNS0_6marrayIdLm3EEEE4typeES5_RKd.exit: +// CHECK-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 8 [[A]], ptr align 8 [[REF_TMP_I]], i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT112:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP1_I]]) +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z14TestAddAssignSRN4sycl3_V16marrayIdLm3EEEd( +// CHECK-PREVIEW-SAME: ptr addrspace(4) noundef align 32 captures(none) dereferenceable(24) [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: [[REF_TMP_I:%.*]] = alloca %"class.sycl::_V1::marray.35", align 32 +// CHECK-PREVIEW-NEXT: [[REF_TMP1_I:%.*]] = alloca %"class.sycl::_V1::marray.35", align 32 +// CHECK-PREVIEW-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-PREVIEW-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP1_I]]) +// CHECK-PREVIEW-NEXT: store double [[B]], ptr [[REF_TMP1_I]], align 8, !tbaa [[TBAA34]] +// CHECK-PREVIEW-NEXT: br label [[ARRAYINIT_BODY_I_I:%.*]] +// CHECK-PREVIEW: arrayinit.body.i.i: +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_CUR_IDX_I_I:%.*]] = phi i64 [ 8, [[ENTRY:%.*]] ], [ [[ARRAYINIT_CUR_ADD_I_I:%.*]], [[ARRAYINIT_BODY_I_I]] ] +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_CUR_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP1_I]], i64 [[ARRAYINIT_CUR_IDX_I_I]] +// CHECK-PREVIEW-NEXT: store double 0.000000e+00, ptr [[ARRAYINIT_CUR_PTR_I_I]], align 8, !tbaa [[TBAA34]] +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_CUR_ADD_I_I]] = add nuw nsw i64 [[ARRAYINIT_CUR_IDX_I_I]], 8 +// CHECK-PREVIEW-NEXT: [[ARRAYINIT_DONE_I_I:%.*]] = icmp eq i64 [[ARRAYINIT_CUR_ADD_I_I]], 24 +// CHECK-PREVIEW-NEXT: br i1 [[ARRAYINIT_DONE_I_I]], label [[FOR_COND_I_I_I:%.*]], label [[ARRAYINIT_BODY_I_I]] +// CHECK-PREVIEW: for.cond.i.i.i: +// CHECK-PREVIEW-NEXT: [[I_0_I_I_I:%.*]] = phi i64 [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ], [ 0, [[ARRAYINIT_BODY_I_I]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I_I]], 3 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V16MARRAYIDLM3EEC2ERKD_EXIT_I:%.*]] +// CHECK-PREVIEW: for.body.i.i.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[REF_TMP1_I]], i64 [[I_0_I_I_I]] +// CHECK-PREVIEW-NEXT: store double [[B]], ptr [[ARRAYIDX_I_I_I]], align 8, !tbaa [[TBAA34]] +// CHECK-PREVIEW-NEXT: [[INC_I_I_I]] = add nuw nsw i64 [[I_0_I_I_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP106:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V16marrayIdLm3EEC2ERKd.exit.i: +// CHECK-PREVIEW-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META107:![0-9]+]]) +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK-PREVIEW: for.cond.i.i: +// CHECK-PREVIEW-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[_ZN4SYCL3_V16MARRAYIDLM3EEC2ERKD_EXIT_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 3 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V1PLILM3EEENST9ENABLE_IFIXNET_LI1EERNS0_6MARRAYIDLM3EEEE4TYPEES5_RKD_EXIT:%.*]] +// CHECK-PREVIEW: for.body.i.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I4_I:%.*]] = getelementptr inbounds nuw [8 x i8], ptr addrspace(4) [[A]], i64 [[I_0_I_I]] +// CHECK-PREVIEW-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[ARRAYIDX_I_I4_I]], align 8, !tbaa [[TBAA34]], !noalias [[META107]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I7_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[REF_TMP1_I]], i64 [[I_0_I_I]] +// CHECK-PREVIEW-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX_I7_I_I]], align 8, !tbaa [[TBAA34]], !noalias [[META107]] +// CHECK-PREVIEW-NEXT: [[ADD_I_I:%.*]] = fadd double [[TMP0]], [[TMP1]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I8_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[REF_TMP_I]], i64 [[I_0_I_I]] +// CHECK-PREVIEW-NEXT: store double [[ADD_I_I]], ptr [[ARRAYIDX_I8_I_I]], align 8, !tbaa [[TBAA34]], !alias.scope [[META107]] +// CHECK-PREVIEW-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP110:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V1pLILm3EEENSt9enable_ifIXneT_Li1EERNS0_6marrayIdLm3EEEE4typeES5_RKd.exit: +// CHECK-PREVIEW-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 32 [[A]], ptr align 32 [[REF_TMP_I]], i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT111:![0-9]+]] +// CHECK-PREVIEW-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP_I]]) +// CHECK-PREVIEW-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP1_I]]) +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL void TestAddAssignS(marray &a, double b) { a += b; } + +/*************** Unary Ops ******************/ + +// CHECK-LABEL: define dso_local spir_func void @_Z13TestUnaryPlusN4sycl3_V16marrayIsLm4EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.32") align 2 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.32") align 2 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META113:![0-9]+]]) +// CHECK-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK: for.cond.i: +// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1PSERKNS0_6MARRAYISLM4EEE_EXIT:%.*]] +// CHECK: for.body.i: +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[A]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA37]], !noalias [[META113]] +// CHECK-NEXT: [[ARRAYIDX_I6_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I]] +// CHECK-NEXT: store i16 [[TMP0]], ptr addrspace(4) [[ARRAYIDX_I6_I]], align 2, !tbaa [[TBAA37]], !alias.scope [[META113]] +// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP116:![0-9]+]] +// CHECK: _ZN4sycl3_V1psERKNS0_6marrayIsLm4EEE.exit: +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z13TestUnaryPlusN4sycl3_V16marrayIsLm4EEE( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.37") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::marray.37") align 8 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META112:![0-9]+]]) +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK-PREVIEW: for.cond.i: +// CHECK-PREVIEW-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1PSERKNS0_6MARRAYISLM4EEE_EXIT:%.*]] +// CHECK-PREVIEW: for.body.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[A]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA36]], !noalias [[META112]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I6_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I]] +// CHECK-PREVIEW-NEXT: store i16 [[TMP0]], ptr addrspace(4) [[ARRAYIDX_I6_I]], align 2, !tbaa [[TBAA36]], !alias.scope [[META112]] +// CHECK-PREVIEW-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP115:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V1psERKNS0_6marrayIsLm4EEE.exit: +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL auto TestUnaryPlus(marray a) { return +a; } + +// CHECK-LABEL: define dso_local spir_func void @_Z10TestPreIncRN4sycl3_V16marrayIlLm8EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.33") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef align 8 captures(none) dereferenceable(64) [[A:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-NEXT: entry: +// CHECK-NEXT: [[REF_TMP_I_I:%.*]] = alloca %"class.sycl::_V1::marray.33", align 8 +// CHECK-NEXT: [[REF_TMP1_I_I:%.*]] = alloca %"class.sycl::_V1::marray.33", align 8 +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP_I_I]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP1_I_I]]) +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP1_I_I]], i64 8 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP0]], i8 0, i64 56, i1 false) +// CHECK-NEXT: store i64 1, ptr [[REF_TMP1_I_I]], align 8, !tbaa [[TBAA44]] +// CHECK-NEXT: br label [[FOR_COND_I_I_I_I:%.*]] +// CHECK: for.cond.i.i.i.i: +// CHECK-NEXT: [[I_0_I_I_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I_I:%.*]], [[FOR_BODY_I_I_I_I:%.*]] ] +// CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I_I_I]], 8 +// CHECK-NEXT: br i1 [[CMP_I_I_I_I]], label [[FOR_BODY_I_I_I_I]], label [[_ZN4SYCL3_V16MARRAYILLM8EEC2ERKL_EXIT_I_I:%.*]] +// CHECK: for.body.i.i.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[REF_TMP1_I_I]], i64 [[I_0_I_I_I_I]] +// CHECK-NEXT: store i64 1, ptr [[ARRAYIDX_I_I_I_I]], align 8, !tbaa [[TBAA44]] +// CHECK-NEXT: [[INC_I_I_I_I]] = add nuw nsw i64 [[I_0_I_I_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I_I_I]], !llvm.loop [[LOOP117:![0-9]+]] +// CHECK: _ZN4sycl3_V16marrayIlLm8EEC2ERKl.exit.i.i: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META118:![0-9]+]]) +// CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK: for.cond.i.i.i: +// CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i64 [ 0, [[_ZN4SYCL3_V16MARRAYILLM8EEC2ERKL_EXIT_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I_I]], 8 +// CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V1PPILEERNST9ENABLE_IFIXNTSR3STDE9IS_SAME_VINST9REMOVE_CVIT_E4TYPEEBEENS0_6MARRAYILLM8EEEE4TYPEERS8__EXIT:%.*]] +// CHECK: for.body.i.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I4_I_I:%.*]] = getelementptr inbounds nuw [8 x i8], ptr addrspace(4) [[A]], i64 [[I_0_I_I_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(4) [[ARRAYIDX_I_I4_I_I]], align 8, !tbaa [[TBAA44]], !noalias [[META118]] +// CHECK-NEXT: [[ARRAYIDX_I7_I_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[REF_TMP1_I_I]], i64 [[I_0_I_I_I]] +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX_I7_I_I_I]], align 8, !tbaa [[TBAA44]], !noalias [[META118]] +// CHECK-NEXT: [[ADD_I_I_I:%.*]] = add nsw i64 [[TMP1]], [[TMP2]] +// CHECK-NEXT: [[ARRAYIDX_I8_I_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[REF_TMP_I_I]], i64 [[I_0_I_I_I]] +// CHECK-NEXT: store i64 [[ADD_I_I_I]], ptr [[ARRAYIDX_I8_I_I_I]], align 8, !tbaa [[TBAA44]], !alias.scope [[META118]] +// CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i64 [[I_0_I_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP121:![0-9]+]] +// CHECK: _ZN4sycl3_V1ppIlEERNSt9enable_ifIXntsr3stdE9is_same_vINSt9remove_cvIT_E4typeEbEENS0_6marrayIlLm8EEEE4typeERS8_.exit: +// CHECK-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 8 [[A]], ptr align 8 [[REF_TMP_I_I]], i64 64, i1 false), !tbaa.struct [[TBAA_STRUCT122:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP_I_I]]) +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP1_I_I]]) +// CHECK-NEXT: tail call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) noundef align 8 dereferenceable(64) [[AGG_RESULT]], ptr addrspace(4) noundef align 8 dereferenceable(64) [[A]], i64 64, i1 false), !tbaa.struct [[TBAA_STRUCT122]] +// CHECK-NEXT: ret void +// +// CHECK-PREVIEW-LABEL: define dso_local spir_func void @_Z10TestPreIncRN4sycl3_V16marrayIlLm8EEE( +// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::marray.38") align 64 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef align 64 captures(none) dereferenceable(64) [[A:%.*]]) local_unnamed_addr #[[ATTR7]] {{.*}}{ +// CHECK-PREVIEW-NEXT: entry: +// CHECK-PREVIEW-NEXT: [[REF_TMP_I_I:%.*]] = alloca %"class.sycl::_V1::marray.38", align 64 +// CHECK-PREVIEW-NEXT: [[REF_TMP1_I_I:%.*]] = alloca %"class.sycl::_V1::marray.38", align 64 +// CHECK-PREVIEW-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP_I_I]]) +// CHECK-PREVIEW-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP1_I_I]]) +// CHECK-PREVIEW-NEXT: call void @llvm.memset.p0.i64(ptr align 64 [[REF_TMP1_I_I]], i8 0, i64 64, i1 false) +// CHECK-PREVIEW-NEXT: store i64 1, ptr [[REF_TMP1_I_I]], align 8, !tbaa [[TBAA43]] +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I_I_I:%.*]] +// CHECK-PREVIEW: for.cond.i.i.i.i: +// CHECK-PREVIEW-NEXT: [[I_0_I_I_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I_I:%.*]], [[FOR_BODY_I_I_I_I:%.*]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I_I_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I_I_I]], 8 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I_I_I_I]], label [[FOR_BODY_I_I_I_I]], label [[_ZN4SYCL3_V16MARRAYILLM8EEC2ERKL_EXIT_I_I:%.*]] +// CHECK-PREVIEW: for.body.i.i.i.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[REF_TMP1_I_I]], i64 [[I_0_I_I_I_I]] +// CHECK-PREVIEW-NEXT: store i64 1, ptr [[ARRAYIDX_I_I_I_I]], align 8, !tbaa [[TBAA43]] +// CHECK-PREVIEW-NEXT: [[INC_I_I_I_I]] = add nuw nsw i64 [[I_0_I_I_I_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I_I_I]], !llvm.loop [[LOOP116:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V16marrayIlLm8EEC2ERKl.exit.i.i: +// CHECK-PREVIEW-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META117:![0-9]+]]) +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-PREVIEW: for.cond.i.i.i: +// CHECK-PREVIEW-NEXT: [[I_0_I_I_I:%.*]] = phi i64 [ 0, [[_ZN4SYCL3_V16MARRAYILLM8EEC2ERKL_EXIT_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-PREVIEW-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I_I]], 8 +// CHECK-PREVIEW-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V1PPILEERNST9ENABLE_IFIXNTSR3STDE9IS_SAME_VINST9REMOVE_CVIT_E4TYPEEBEENS0_6MARRAYILLM8EEEE4TYPEERS8__EXIT:%.*]] +// CHECK-PREVIEW: for.body.i.i.i: +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I_I4_I_I:%.*]] = getelementptr inbounds nuw [8 x i8], ptr addrspace(4) [[A]], i64 [[I_0_I_I_I]] +// CHECK-PREVIEW-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[ARRAYIDX_I_I4_I_I]], align 8, !tbaa [[TBAA43]], !noalias [[META117]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I7_I_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[REF_TMP1_I_I]], i64 [[I_0_I_I_I]] +// CHECK-PREVIEW-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX_I7_I_I_I]], align 8, !tbaa [[TBAA43]], !noalias [[META117]] +// CHECK-PREVIEW-NEXT: [[ADD_I_I_I:%.*]] = add nsw i64 [[TMP0]], [[TMP1]] +// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I8_I_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[REF_TMP_I_I]], i64 [[I_0_I_I_I]] +// CHECK-PREVIEW-NEXT: store i64 [[ADD_I_I_I]], ptr [[ARRAYIDX_I8_I_I_I]], align 8, !tbaa [[TBAA43]], !alias.scope [[META117]] +// CHECK-PREVIEW-NEXT: [[INC_I_I_I]] = add nuw nsw i64 [[I_0_I_I_I]], 1 +// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP120:![0-9]+]] +// CHECK-PREVIEW: _ZN4sycl3_V1ppIlEERNSt9enable_ifIXntsr3stdE9is_same_vINSt9remove_cvIT_E4typeEbEENS0_6marrayIlLm8EEEE4typeERS8_.exit: +// CHECK-PREVIEW-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 64 [[A]], ptr align 64 [[REF_TMP_I_I]], i64 64, i1 false), !tbaa.struct [[TBAA_STRUCT121:![0-9]+]] +// CHECK-PREVIEW-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP_I_I]]) +// CHECK-PREVIEW-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP1_I_I]]) +// CHECK-PREVIEW-NEXT: tail call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) noundef align 64 dereferenceable(64) [[AGG_RESULT]], ptr addrspace(4) noundef align 64 dereferenceable(64) [[A]], i64 64, i1 false), !tbaa.struct [[TBAA_STRUCT121]] +// CHECK-PREVIEW-NEXT: ret void +// +SYCL_EXTERNAL auto TestPreInc(marray &a) { return ++a; }