diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index c4eb7b7cac1d6..007cf80223dec 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -9,6 +9,8 @@ #ifndef LLVM_CLANG_BASIC_CUDA_H #define LLVM_CLANG_BASIC_CUDA_H +#include "clang/Basic/Offloading.h" + namespace llvm { class StringRef; class Twine; @@ -54,98 +56,6 @@ const char *CudaVersionToString(CudaVersion V); // Input is "Major.Minor" CudaVersion CudaStringToVersion(const llvm::Twine &S); -enum class OffloadArch { - UNUSED, - UNKNOWN, - // TODO: Deprecate and remove GPU architectures older than sm_52. - SM_20, - SM_21, - SM_30, - // This has a name conflict with sys/mac.h on AIX, rename it as a workaround. - SM_32_, - SM_35, - SM_37, - SM_50, - SM_52, - SM_53, - SM_60, - SM_61, - SM_62, - SM_70, - SM_72, - SM_75, - SM_80, - SM_86, - SM_87, - SM_89, - SM_90, - SM_90a, - SM_100, - SM_100a, - SM_101, - SM_101a, - SM_120, - SM_120a, - GFX600, - GFX601, - GFX602, - GFX700, - GFX701, - GFX702, - GFX703, - GFX704, - GFX705, - GFX801, - GFX802, - GFX803, - GFX805, - GFX810, - GFX9_GENERIC, - GFX900, - GFX902, - GFX904, - GFX906, - GFX908, - GFX909, - GFX90a, - GFX90c, - GFX9_4_GENERIC, - GFX942, - GFX950, - GFX10_1_GENERIC, - GFX1010, - GFX1011, - GFX1012, - GFX1013, - GFX10_3_GENERIC, - GFX1030, - GFX1031, - GFX1032, - GFX1033, - GFX1034, - GFX1035, - GFX1036, - GFX11_GENERIC, - GFX1100, - GFX1101, - GFX1102, - GFX1103, - GFX1150, - GFX1151, - GFX1152, - GFX1153, - GFX12_GENERIC, - GFX1200, - GFX1201, - AMDGCNSPIRV, - Generic, // A processor model named 'generic' if the target backend defines a - // public one. - LAST, - - CudaDefault = OffloadArch::SM_52, - HIPDefault = OffloadArch::GFX906, -}; - enum class CUDAFunctionTarget { Device, Global, @@ -154,21 +64,6 @@ enum class CUDAFunctionTarget { InvalidTarget }; -static inline bool IsNVIDIAOffloadArch(OffloadArch A) { - return A >= OffloadArch::SM_20 && A < OffloadArch::GFX600; -} - -static inline bool IsAMDOffloadArch(OffloadArch A) { - // Generic processor model is for testing only. - return A >= OffloadArch::GFX600 && A < OffloadArch::Generic; -} - -const char *OffloadArchToString(OffloadArch A); -const char *OffloadArchToVirtualArchString(OffloadArch A); - -// The input should have the form "sm_20". -OffloadArch StringToOffloadArch(llvm::StringRef S); - /// Get the earliest CudaVersion that supports the given OffloadArch. CudaVersion MinVersionForOffloadArch(OffloadArch A); diff --git a/clang/include/clang/Basic/Offloading.h b/clang/include/clang/Basic/Offloading.h new file mode 100644 index 0000000000000..6e2337d6ae471 --- /dev/null +++ b/clang/include/clang/Basic/Offloading.h @@ -0,0 +1,196 @@ +//===--- Offloading.h - Utilities for offloading ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_OFFLOADING_H +#define LLVM_CLANG_BASIC_OFFLOADING_H + +namespace llvm { +class StringRef; +} // namespace llvm + +namespace clang { + +enum class OffloadArch { + UNUSED, + UNKNOWN, + // TODO: Deprecate and remove GPU architectures older than sm_52. + SM_20, + SM_21, + SM_30, + // This has a name conflict with sys/mac.h on AIX, rename it as a workaround. + SM_32_, + SM_35, + SM_37, + SM_50, + SM_52, + SM_53, + SM_60, + SM_61, + SM_62, + SM_70, + SM_72, + SM_75, + SM_80, + SM_86, + SM_87, + SM_89, + SM_90, + SM_90a, + SM_100, + SM_100a, + SM_101, + SM_101a, + SM_120, + SM_120a, + GFX600, + GFX601, + GFX602, + GFX700, + GFX701, + GFX702, + GFX703, + GFX704, + GFX705, + GFX801, + GFX802, + GFX803, + GFX805, + GFX810, + GFX9_GENERIC, + GFX900, + GFX902, + GFX904, + GFX906, + GFX908, + GFX909, + GFX90a, + GFX90c, + GFX9_4_GENERIC, + GFX942, + GFX950, + GFX10_1_GENERIC, + GFX1010, + GFX1011, + GFX1012, + GFX1013, + GFX10_3_GENERIC, + GFX1030, + GFX1031, + GFX1032, + GFX1033, + GFX1034, + GFX1035, + GFX1036, + GFX11_GENERIC, + GFX1100, + GFX1101, + GFX1102, + GFX1103, + GFX1150, + GFX1151, + GFX1152, + GFX1153, + GFX12_GENERIC, + GFX1200, + GFX1201, + AMDGCNSPIRV, + Generic, // A processor model named 'generic' if the target backend defines a + // public one. + // Intel CPUs + SKYLAKEAVX512, + COREAVX2, + COREI7AVX, + COREI7, + WESTMERE, + SANDYBRIDGE, + IVYBRIDGE, + BROADWELL, + COFFEELAKE, + ALDERLAKE, + SKYLAKE, + SKX, + CASCADELAKE, + ICELAKECLIENT, + ICELAKESERVER, + SAPPHIRERAPIDS, + GRANITERAPIDS, + // Intel GPUs + BDW, + SKL, + KBL, + CFL, + APL, + BXT, + GLK, + WHL, + AML, + CML, + ICLLP, + ICL, + EHL, + JSL, + TGLLP, + TGL, + RKL, + ADL_S, + RPL_S, + ADL_P, + ADL_N, + DG1, + ACM_G10, + DG2_G10, + ACM_G11, + DG2_G11, + ACM_G12, + DG2_G12, + PVC, + PVC_VG, + MTL_U, + MTL_S, + ARL_U, + ARL_S, + MTL_H, + ARL_H, + BMG_G21, + LNL_M, + LAST, + + CudaDefault = OffloadArch::SM_52, + HIPDefault = OffloadArch::GFX906, +}; + +static inline bool IsNVIDIAOffloadArch(OffloadArch A) { + return A >= OffloadArch::SM_20 && A < OffloadArch::GFX600; +} + +static inline bool IsAMDOffloadArch(OffloadArch A) { + // Generic processor model is for testing only. + return A >= OffloadArch::GFX600 && A < OffloadArch::Generic; +} + +static inline bool IsIntelOffloadArch(OffloadArch Arch) { + return Arch >= OffloadArch::SKYLAKEAVX512 && Arch < OffloadArch::LAST; +} + +static inline bool IsIntelCPUArch(OffloadArch Arch) { + return Arch >= OffloadArch::SKYLAKEAVX512 && Arch < OffloadArch::BDW; +} + +static inline bool IsIntelGPUArch(OffloadArch Arch) { + return Arch >= OffloadArch::BDW && Arch < OffloadArch::LAST; +} + +const char *OffloadArchToString(OffloadArch A); +const char *OffloadArchToVirtualArchString(OffloadArch A); + +// The input should have the form "sm_20". +OffloadArch StringToOffloadArch(llvm::StringRef S); + +} // namespace clang + +#endif // LLVM_CLANG_BASIC_OFFLOADING_H diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index 331dfbb3f4b67..5c91dc43df9b6 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -76,6 +76,7 @@ add_clang_library(clangBasic MakeSupport.cpp Module.cpp ObjCRuntime.cpp + Offloading.cpp OpenCLOptions.cpp OpenMPKinds.cpp OperatorPrecedence.cpp diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 68d042eca2492..8ea242911a2ba 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -73,123 +73,6 @@ CudaVersion ToCudaVersion(llvm::VersionTuple Version) { return CudaVersion::UNKNOWN; } -namespace { -struct OffloadArchToStringMap { - OffloadArch arch; - const char *arch_name; - const char *virtual_arch_name; -}; -} // namespace - -#define SM2(sm, ca) {OffloadArch::SM_##sm, "sm_" #sm, ca} -#define SM(sm) SM2(sm, "compute_" #sm) -#define GFX(gpu) {OffloadArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn"} -static const OffloadArchToStringMap arch_names[] = { - // clang-format off - {OffloadArch::UNUSED, "", ""}, - SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi - SM(30), {OffloadArch::SM_32_, "sm_32", "compute_32"}, SM(35), SM(37), // Kepler - SM(50), SM(52), SM(53), // Maxwell - SM(60), SM(61), SM(62), // Pascal - SM(70), SM(72), // Volta - SM(75), // Turing - SM(80), SM(86), // Ampere - SM(87), // Jetson/Drive AGX Orin - SM(89), // Ada Lovelace - SM(90), // Hopper - SM(90a), // Hopper - SM(100), // Blackwell - SM(100a), // Blackwell - SM(101), // Blackwell - SM(101a), // Blackwell - SM(120), // Blackwell - SM(120a), // Blackwell - GFX(600), // gfx600 - GFX(601), // gfx601 - GFX(602), // gfx602 - GFX(700), // gfx700 - GFX(701), // gfx701 - GFX(702), // gfx702 - GFX(703), // gfx703 - GFX(704), // gfx704 - GFX(705), // gfx705 - GFX(801), // gfx801 - GFX(802), // gfx802 - GFX(803), // gfx803 - GFX(805), // gfx805 - GFX(810), // gfx810 - {OffloadArch::GFX9_GENERIC, "gfx9-generic", "compute_amdgcn"}, - GFX(900), // gfx900 - GFX(902), // gfx902 - GFX(904), // gfx903 - GFX(906), // gfx906 - GFX(908), // gfx908 - GFX(909), // gfx909 - GFX(90a), // gfx90a - GFX(90c), // gfx90c - {OffloadArch::GFX9_4_GENERIC, "gfx9-4-generic", "compute_amdgcn"}, - GFX(942), // gfx942 - GFX(950), // gfx950 - {OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"}, - GFX(1010), // gfx1010 - GFX(1011), // gfx1011 - GFX(1012), // gfx1012 - GFX(1013), // gfx1013 - {OffloadArch::GFX10_3_GENERIC, "gfx10-3-generic", "compute_amdgcn"}, - GFX(1030), // gfx1030 - GFX(1031), // gfx1031 - GFX(1032), // gfx1032 - GFX(1033), // gfx1033 - GFX(1034), // gfx1034 - GFX(1035), // gfx1035 - GFX(1036), // gfx1036 - {OffloadArch::GFX11_GENERIC, "gfx11-generic", "compute_amdgcn"}, - GFX(1100), // gfx1100 - GFX(1101), // gfx1101 - GFX(1102), // gfx1102 - GFX(1103), // gfx1103 - GFX(1150), // gfx1150 - GFX(1151), // gfx1151 - GFX(1152), // gfx1152 - GFX(1153), // gfx1153 - {OffloadArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"}, - GFX(1200), // gfx1200 - GFX(1201), // gfx1201 - {OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"}, - {OffloadArch::Generic, "generic", ""}, - // clang-format on -}; -#undef SM -#undef SM2 -#undef GFX - -const char *OffloadArchToString(OffloadArch A) { - auto result = std::find_if( - std::begin(arch_names), std::end(arch_names), - [A](const OffloadArchToStringMap &map) { return A == map.arch; }); - if (result == std::end(arch_names)) - return "unknown"; - return result->arch_name; -} - -const char *OffloadArchToVirtualArchString(OffloadArch A) { - auto result = std::find_if( - std::begin(arch_names), std::end(arch_names), - [A](const OffloadArchToStringMap &map) { return A == map.arch; }); - if (result == std::end(arch_names)) - return "unknown"; - return result->virtual_arch_name; -} - -OffloadArch StringToOffloadArch(llvm::StringRef S) { - auto result = std::find_if( - std::begin(arch_names), std::end(arch_names), - [S](const OffloadArchToStringMap &map) { return S == map.arch_name; }); - if (result == std::end(arch_names)) - return OffloadArch::UNKNOWN; - return result->arch; -} - CudaVersion MinVersionForOffloadArch(OffloadArch A) { if (A == OffloadArch::UNKNOWN) return CudaVersion::UNKNOWN; diff --git a/clang/lib/Basic/Offloading.cpp b/clang/lib/Basic/Offloading.cpp new file mode 100644 index 0000000000000..63313a6777dd8 --- /dev/null +++ b/clang/lib/Basic/Offloading.cpp @@ -0,0 +1,185 @@ +#include "clang/Basic/Offloading.h" + +#include "llvm/ADT/StringRef.h" + +#include + +namespace clang { + +namespace { +struct OffloadArchToStringMap { + OffloadArch arch; + const char *arch_name; + const char *virtual_arch_name; +}; +} // namespace + +#define SM2(sm, ca) {OffloadArch::SM_##sm, "sm_" #sm, ca} +#define SM(sm) SM2(sm, "compute_" #sm) +#define GFX(gpu) {OffloadArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn"} +#define INTEL(name, value) {OffloadArch::value, #name, ""} +static const OffloadArchToStringMap arch_names[] = { + // clang-format off + {OffloadArch::UNUSED, "", ""}, + SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi + SM(30), {OffloadArch::SM_32_, "sm_32", "compute_32"}, SM(35), SM(37), // Kepler + SM(50), SM(52), SM(53), // Maxwell + SM(60), SM(61), SM(62), // Pascal + SM(70), SM(72), // Volta + SM(75), // Turing + SM(80), SM(86), // Ampere + SM(87), // Jetson/Drive AGX Orin + SM(89), // Ada Lovelace + SM(90), // Hopper + SM(90a), // Hopper + SM(100), // Blackwell + SM(100a), // Blackwell + SM(101), // Blackwell + SM(101a), // Blackwell + SM(120), // Blackwell + SM(120a), // Blackwell + GFX(600), // gfx600 + GFX(601), // gfx601 + GFX(602), // gfx602 + GFX(700), // gfx700 + GFX(701), // gfx701 + GFX(702), // gfx702 + GFX(703), // gfx703 + GFX(704), // gfx704 + GFX(705), // gfx705 + GFX(801), // gfx801 + GFX(802), // gfx802 + GFX(803), // gfx803 + GFX(805), // gfx805 + GFX(810), // gfx810 + {OffloadArch::GFX9_GENERIC, "gfx9-generic", "compute_amdgcn"}, + GFX(900), // gfx900 + GFX(902), // gfx902 + GFX(904), // gfx903 + GFX(906), // gfx906 + GFX(908), // gfx908 + GFX(909), // gfx909 + GFX(90a), // gfx90a + GFX(90c), // gfx90c + {OffloadArch::GFX9_4_GENERIC, "gfx9-4-generic", "compute_amdgcn"}, + GFX(942), // gfx942 + GFX(950), // gfx950 + {OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"}, + GFX(1010), // gfx1010 + GFX(1011), // gfx1011 + GFX(1012), // gfx1012 + GFX(1013), // gfx1013 + {OffloadArch::GFX10_3_GENERIC, "gfx10-3-generic", "compute_amdgcn"}, + GFX(1030), // gfx1030 + GFX(1031), // gfx1031 + GFX(1032), // gfx1032 + GFX(1033), // gfx1033 + GFX(1034), // gfx1034 + GFX(1035), // gfx1035 + GFX(1036), // gfx1036 + {OffloadArch::GFX11_GENERIC, "gfx11-generic", "compute_amdgcn"}, + GFX(1100), // gfx1100 + GFX(1101), // gfx1101 + GFX(1102), // gfx1102 + GFX(1103), // gfx1103 + GFX(1150), // gfx1150 + GFX(1151), // gfx1151 + GFX(1152), // gfx1152 + GFX(1153), // gfx1153 + {OffloadArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"}, + GFX(1200), // gfx1200 + GFX(1201), // gfx1201 + {OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"}, + // Intel CPUs + INTEL(skylake-avx512, SKYLAKEAVX512), + INTEL(core-avx2, COREAVX2), + INTEL(corei7-avx, COREI7AVX), + INTEL(corei7, COREI7), + INTEL(westmere, WESTMERE), + INTEL(sandybridge, SANDYBRIDGE), + INTEL(ivybridge, IVYBRIDGE), + INTEL(broadwell, BROADWELL), + INTEL(coffeelake, COFFEELAKE), + INTEL(alderlake, ALDERLAKE), + INTEL(skylake, SKYLAKE), + INTEL(skx, SKX), + INTEL(cascadelake, CASCADELAKE), + INTEL(icelake-client, ICELAKECLIENT), + INTEL(icelakeserver, ICELAKESERVER), + INTEL(sapphirerapids, SAPPHIRERAPIDS), + INTEL(graniterapids, GRANITERAPIDS), + // Intel GPUs + INTEL(bdw, BDW), + INTEL(skl, SKL), + INTEL(kbl, KBL), + INTEL(cfl, CFL), + INTEL(apl, APL), + INTEL(bxt, BXT), + INTEL(glk, GLK), + INTEL(whl, WHL), + INTEL(aml, AML), + INTEL(cml, CML), + INTEL(icllp, ICLLP), + INTEL(icl, ICL), + INTEL(ehl, EHL), + INTEL(jsl, JSL), + INTEL(tgllp, TGLLP), + INTEL(tgl, TGL), + INTEL(rkl, RKL), + INTEL(adl_s, ADL_S), + INTEL(rpl_s, RPL_S), + INTEL(adl_p, ADL_P), + INTEL(adl_n, ADL_N), + INTEL(dg1, DG1), + INTEL(acm_g10, ACM_G10), + INTEL(dg2_g10, DG2_G10), + INTEL(acm_g11, ACM_G11), + INTEL(dg2_g11, DG2_G11), + INTEL(acm_g12, ACM_G12), + INTEL(dg2_g12, DG2_G12), + INTEL(pvc, PVC), + INTEL(pvc_vg, PVC_VG), + INTEL(mtl_u, MTL_U), + INTEL(mtl_s, MTL_S), + INTEL(arl_u, ARL_U), + INTEL(arl_s, ARL_S), + INTEL(mtl_h, MTL_H), + INTEL(arl_h, ARL_H), + INTEL(bmg_g21, BMG_G21), + INTEL(lnl_m, LNL_M), + {OffloadArch::Generic, "generic", ""}, + // clang-format on +}; +#undef SM +#undef SM2 +#undef GFX +#undef INTEL + +const char *OffloadArchToString(OffloadArch A) { + auto result = std::find_if( + std::begin(arch_names), std::end(arch_names), + [A](const OffloadArchToStringMap &map) { return A == map.arch; }); + if (result == std::end(arch_names)) + return "unknown"; + return result->arch_name; +} + +const char *OffloadArchToVirtualArchString(OffloadArch A) { + auto result = std::find_if( + std::begin(arch_names), std::end(arch_names), + [A](const OffloadArchToStringMap &map) { return A == map.arch; }); + if (result == std::end(arch_names)) + return "unknown"; + return result->virtual_arch_name; +} + +OffloadArch StringToOffloadArch(llvm::StringRef S) { + auto result = std::find_if( + std::begin(arch_names), std::end(arch_names), + [S](const OffloadArchToStringMap &map) { return S == map.arch_name; }); + if (result == std::end(arch_names)) + return OffloadArch::UNKNOWN; + return result->arch; +} + +} // namespace clang diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 5931a77a85fec..4f04d83c9c068 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -240,6 +240,61 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, case OffloadArch::GFX1201: case OffloadArch::AMDGCNSPIRV: case OffloadArch::Generic: + case OffloadArch::SKYLAKEAVX512: + case OffloadArch::COREAVX2: + case OffloadArch::COREI7AVX: + case OffloadArch::COREI7: + case OffloadArch::WESTMERE: + case OffloadArch::SANDYBRIDGE: + case OffloadArch::IVYBRIDGE: + case OffloadArch::BROADWELL: + case OffloadArch::COFFEELAKE: + case OffloadArch::ALDERLAKE: + case OffloadArch::SKYLAKE: + case OffloadArch::SKX: + case OffloadArch::CASCADELAKE: + case OffloadArch::ICELAKECLIENT: + case OffloadArch::ICELAKESERVER: + case OffloadArch::SAPPHIRERAPIDS: + case OffloadArch::GRANITERAPIDS: + case OffloadArch::BDW: + case OffloadArch::SKL: + case OffloadArch::KBL: + case OffloadArch::CFL: + case OffloadArch::APL: + case OffloadArch::BXT: + case OffloadArch::GLK: + case OffloadArch::WHL: + case OffloadArch::AML: + case OffloadArch::CML: + case OffloadArch::ICLLP: + case OffloadArch::ICL: + case OffloadArch::EHL: + case OffloadArch::JSL: + case OffloadArch::TGLLP: + case OffloadArch::TGL: + case OffloadArch::RKL: + case OffloadArch::ADL_S: + case OffloadArch::RPL_S: + case OffloadArch::ADL_P: + case OffloadArch::ADL_N: + case OffloadArch::DG1: + case OffloadArch::ACM_G10: + case OffloadArch::DG2_G10: + case OffloadArch::ACM_G11: + case OffloadArch::DG2_G11: + case OffloadArch::ACM_G12: + case OffloadArch::DG2_G12: + case OffloadArch::PVC: + case OffloadArch::PVC_VG: + case OffloadArch::MTL_U: + case OffloadArch::MTL_S: + case OffloadArch::ARL_U: + case OffloadArch::ARL_S: + case OffloadArch::MTL_H: + case OffloadArch::ARL_H: + case OffloadArch::BMG_G21: + case OffloadArch::LNL_M: case OffloadArch::LAST: break; case OffloadArch::UNKNOWN: diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index feb2448297542..80990eeed7511 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2335,6 +2335,61 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) { case OffloadArch::GFX1201: case OffloadArch::AMDGCNSPIRV: case OffloadArch::Generic: + case OffloadArch::SKYLAKEAVX512: + case OffloadArch::COREAVX2: + case OffloadArch::COREI7AVX: + case OffloadArch::COREI7: + case OffloadArch::WESTMERE: + case OffloadArch::SANDYBRIDGE: + case OffloadArch::IVYBRIDGE: + case OffloadArch::BROADWELL: + case OffloadArch::COFFEELAKE: + case OffloadArch::ALDERLAKE: + case OffloadArch::SKYLAKE: + case OffloadArch::SKX: + case OffloadArch::CASCADELAKE: + case OffloadArch::ICELAKECLIENT: + case OffloadArch::ICELAKESERVER: + case OffloadArch::SAPPHIRERAPIDS: + case OffloadArch::GRANITERAPIDS: + case OffloadArch::BDW: + case OffloadArch::SKL: + case OffloadArch::KBL: + case OffloadArch::CFL: + case OffloadArch::APL: + case OffloadArch::BXT: + case OffloadArch::GLK: + case OffloadArch::WHL: + case OffloadArch::AML: + case OffloadArch::CML: + case OffloadArch::ICLLP: + case OffloadArch::ICL: + case OffloadArch::EHL: + case OffloadArch::JSL: + case OffloadArch::TGLLP: + case OffloadArch::TGL: + case OffloadArch::RKL: + case OffloadArch::ADL_S: + case OffloadArch::RPL_S: + case OffloadArch::ADL_P: + case OffloadArch::ADL_N: + case OffloadArch::DG1: + case OffloadArch::ACM_G10: + case OffloadArch::DG2_G10: + case OffloadArch::ACM_G11: + case OffloadArch::DG2_G11: + case OffloadArch::ACM_G12: + case OffloadArch::DG2_G12: + case OffloadArch::PVC: + case OffloadArch::PVC_VG: + case OffloadArch::MTL_U: + case OffloadArch::MTL_S: + case OffloadArch::ARL_U: + case OffloadArch::ARL_S: + case OffloadArch::MTL_H: + case OffloadArch::ARL_H: + case OffloadArch::BMG_G21: + case OffloadArch::LNL_M: case OffloadArch::UNUSED: case OffloadArch::UNKNOWN: break; diff --git a/clang/test/Driver/clang-sycl-linker-test.cpp b/clang/test/Driver/clang-sycl-linker-test.cpp index c399689653784..2ef7afaa69aac 100644 --- a/clang/test/Driver/clang-sycl-linker-test.cpp +++ b/clang/test/Driver/clang-sycl-linker-test.cpp @@ -20,7 +20,7 @@ // // Test a simple case with a random file (not bitcode) as input. // RUN: touch %t.o -// RUN: not clang-sycl-linker -triple spirv64 %t.o -o a.spv 2>&1 \ +// RUN: not clang-sycl-linker -triple=spirv64 %t.o -o a.spv 2>&1 \ // RUN: | FileCheck %s --check-prefix=FILETYPEERROR // FILETYPEERROR: Unsupported file type // @@ -31,3 +31,43 @@ // RUN: not clang-sycl-linker --dry-run -triple=spirv64 %t_1.bc %t_2.bc --library-path=%T --device-libs=lib1.bc,lib2.bc,lib3.bc -o a.spv 2>&1 \ // RUN: | FileCheck %s --check-prefix=DEVLIBSERR2 // DEVLIBSERR2: '{{.*}}lib3.bc' SYCL device library file is not found +// +// Test AOT compilation for an Intel GPU. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=pvc %t_1.bc %t_2.bc -o a.out 2>&1 \ +// RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU +// AOT-INTEL-GPU: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc +// AOT-INTEL-GPU-NEXT: SPIR-V Backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]].spv +// AOT-INTEL-GPU-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device pvc {{.*}}-output a.out -file [[SPIRVTRANSLATIONOUT]] +// +// Test AOT compilation for an Intel GPU with additional options. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=pvc %t_1.bc %t_2.bc -o a.out 2>&1 \ +// RUN: --ocloc-options="-a -b" \ +// RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU-2 +// AOT-INTEL-GPU-2: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc +// AOT-INTEL-GPU-2-NEXT: SPIR-V Backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]].spv +// AOT-INTEL-GPU-2-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device pvc -a -b {{.*}}-output a.out -file [[SPIRVTRANSLATIONOUT]] +// +// Test AOT compilation for an Intel CPU. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=corei7 %t_1.bc %t_2.bc -o a.out 2>&1 \ +// RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU +// AOT-INTEL-CPU: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc +// AOT-INTEL-CPU-NEXT: SPIR-V Backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]].spv +// AOT-INTEL-CPU-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu {{.*}}-o a.out [[SPIRVTRANSLATIONOUT]] +// +// Test AOT compilation for an Intel CPU with additional options. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=corei7 %t_1.bc %t_2.bc -o a.out 2>&1 \ +// RUN: --opencl-aot-options="-a -b" \ +// RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU-2 +// AOT-INTEL-CPU-2: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc +// AOT-INTEL-CPU-2-NEXT: SPIR-V Backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]].spv +// AOT-INTEL-CPU-2-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu -a -b {{.*}}-o a.out [[SPIRVTRANSLATIONOUT]] +// +// Check that the output file must be specified. +// RUN: not clang-sycl-linker --dry-run %t_1.bc %t_2.bc 2>& 1 \ +// RUN: | FileCheck %s --check-prefix=NOOUTPUT +// NOOUTPUT: Output file must be specified +// +// Check that the target triple must be. +// RUN: not clang-sycl-linker --dry-run %t_1.bc %t_2.bc -o a.out 2>& 1 \ +// RUN: | FileCheck %s --check-prefix=NOTARGET +// NOTARGET: Target triple must be specified diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp index c640deddc9e74..6a9ef4c0fc42c 100644 --- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp +++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp @@ -14,6 +14,7 @@ // target-specific device code. //===---------------------------------------------------------------------===// +#include "clang/Basic/Cuda.h" #include "clang/Basic/Version.h" #include "llvm/ADT/StringExtras.h" @@ -54,6 +55,7 @@ using namespace llvm; using namespace llvm::opt; using namespace llvm::object; +using namespace clang; /// Save intermediary results. static bool SaveTemps = false; @@ -70,6 +72,8 @@ static StringRef OutputFile; /// Directory to dump SPIR-V IR if requested by user. static SmallString<128> SPIRVDumpDir; +static bool IsAOTCompileNeeded = false; + static void printVersion(raw_ostream &OS) { OS << clang::getClangToolFullVersion("clang-sycl-linker") << '\n'; } @@ -126,6 +130,12 @@ const OptTable &getOptTable() { exit(EXIT_FAILURE); } +std::string getMainExecutable(const char *Name) { + void *Ptr = (void *)(intptr_t)&getMainExecutable; + auto COWPath = sys::fs::getMainExecutable(Name, Ptr); + return sys::path::parent_path(COWPath).str(); +} + Expected createTempFile(const ArgList &Args, const Twine &Prefix, StringRef Extension) { SmallString<128> OutputFile; @@ -143,6 +153,40 @@ Expected createTempFile(const ArgList &Args, const Twine &Prefix, return TempFiles.back(); } +Expected findProgram(const ArgList &Args, StringRef Name, + ArrayRef Paths) { + if (Args.hasArg(OPT_dry_run)) + return Name.str(); + ErrorOr Path = sys::findProgramByName(Name, Paths); + if (!Path) + Path = sys::findProgramByName(Name); + if (!Path) + return createStringError(Path.getError(), + "Unable to find '" + Name + "' in path"); + return *Path; +} + +void printCommands(ArrayRef CmdArgs) { + if (CmdArgs.empty()) + return; + + llvm::errs() << " \"" << CmdArgs.front() << "\" "; + llvm::errs() << llvm::join(std::next(CmdArgs.begin()), CmdArgs.end(), " ") + << "\n"; +} + +/// Execute the command \p ExecutablePath with the arguments \p Args. +Error executeCommands(StringRef ExecutablePath, ArrayRef Args) { + if (Verbose || DryRun) + printCommands(Args); + + if (!DryRun) + if (sys::ExecuteAndWait(ExecutablePath, Args)) + return createStringError( + "'%s' failed", sys::path::filename(ExecutablePath).str().c_str()); + return Error::success(); +} + Expected> getInput(const ArgList &Args) { // Collect all input bitcode files to be passed to the device linking stage. SmallVector BitcodeFiles; @@ -315,25 +359,109 @@ static Expected runSPIRVCodeGen(StringRef File, const ArgList &Args, M->setDataLayout(TM->createDataLayout()); // Open output file for writing. - int FD = -1; - if (std::error_code EC = sys::fs::openFileForWrite(OutputFile, FD)) - return errorCodeToError(EC); - auto OS = std::make_unique(FD, true); + Expected OutFileOrErr = + IsAOTCompileNeeded + ? createTempFile(Args, sys::path::filename(OutputFile), "spv") + : OutputFile; + if (!OutFileOrErr) + return OutFileOrErr.takeError(); + std::error_code EC; + raw_fd_ostream OS(*OutFileOrErr, EC); + if (EC) + return createStringError(EC, "Could not open file " + *OutFileOrErr); // Run SPIR-V codegen passes to generate SPIR-V file. legacy::PassManager CodeGenPasses; TargetLibraryInfoImpl TLII(M->getTargetTriple()); CodeGenPasses.add(new TargetLibraryInfoWrapperPass(TLII)); - if (TM->addPassesToEmitFile(CodeGenPasses, *OS, nullptr, + if (TM->addPassesToEmitFile(CodeGenPasses, OS, nullptr, CodeGenFileType::ObjectFile)) return createStringError("Failed to execute SPIR-V Backend"); CodeGenPasses.run(*M); if (Verbose) errs() << formatv("SPIR-V Backend: input: {0}, output: {1}\n", File, - OutputFile); + *OutFileOrErr); + + return *OutFileOrErr; +} + +/// Run AOT compilation for Intel CPU. +/// Calls opencl-aot tool to generate device code for Intel CPU backend. +/// 'InputFile' is the input SPIR-V file. +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate options required to be passed into the +/// SYCL AOT compilation step. +static Error runAOTCompileIntelCPU(StringRef InputFile, const ArgList &Args) { + SmallVector CmdArgs; + Expected OpenCLAOTPath = + findProgram(Args, "opencl-aot", {getMainExecutable("opencl-aot")}); + if (!OpenCLAOTPath) + return OpenCLAOTPath.takeError(); + + CmdArgs.push_back(*OpenCLAOTPath); + CmdArgs.push_back("--device=cpu"); + StringRef ExtraArgs = Args.getLastArgValue(OPT_opencl_aot_options_EQ); + ExtraArgs.split(CmdArgs, " ", /*MaxSplit=*/-1, /*KeepEmpty=*/false); + CmdArgs.push_back("-o"); + CmdArgs.push_back(OutputFile); + CmdArgs.push_back(InputFile); + if (Error Err = executeCommands(*OpenCLAOTPath, CmdArgs)) + return Err; + return Error::success(); +} + +/// Run AOT compilation for Intel GPU +/// Calls ocloc tool to generate device code for Intel GPU backend. +/// 'InputFile' is the input SPIR-V file. +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate options required to be passed into the +/// SYCL AOT compilation step. +static Error runAOTCompileIntelGPU(StringRef InputFile, const ArgList &Args) { + SmallVector CmdArgs; + Expected OclocPath = + findProgram(Args, "ocloc", {getMainExecutable("ocloc")}); + if (!OclocPath) + return OclocPath.takeError(); + + CmdArgs.push_back(*OclocPath); + // The next line prevents ocloc from modifying the image name + CmdArgs.push_back("-output_no_suffix"); + CmdArgs.push_back("-spirv_input"); + + StringRef Arch(Args.getLastArgValue(OPT_arch_EQ)); + if (Arch.empty()) + return createStringError(inconvertibleErrorCode(), + "Arch must be specified for AOT compilation"); + CmdArgs.push_back("-device"); + CmdArgs.push_back(Arch); + + StringRef ExtraArgs = Args.getLastArgValue(OPT_ocloc_options_EQ); + ExtraArgs.split(CmdArgs, " ", /*MaxSplit=*/-1, /*KeepEmpty=*/false); + + CmdArgs.push_back("-output"); + CmdArgs.push_back(OutputFile); + CmdArgs.push_back("-file"); + CmdArgs.push_back(InputFile); + if (Error Err = executeCommands(*OclocPath, CmdArgs)) + return Err; + return Error::success(); +} - return OutputFile; +/// Run AOT compilation for Intel CPU/GPU. +/// 'InputFile' is the input SPIR-V file. +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate options required to be passed into the +/// SYCL AOT compilation step. +static Error runAOTCompile(StringRef InputFile, const ArgList &Args) { + StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); + OffloadArch OffloadArch = StringToOffloadArch(Arch); + if (IsIntelGPUArch(OffloadArch)) + return runAOTCompileIntelGPU(InputFile, Args); + if (IsIntelCPUArch(OffloadArch)) + return runAOTCompileIntelCPU(InputFile, Args); + + return createStringError(inconvertibleErrorCode(), "Unsupported arch"); } /// Performs the following steps: @@ -347,12 +475,18 @@ Error runSYCLLink(ArrayRef Files, const ArgList &Args) { // Link all input bitcode files and SYCL device library files, if any. auto LinkedFile = linkDeviceCode(Files, Args, C); if (!LinkedFile) - reportError(LinkedFile.takeError()); + return LinkedFile.takeError(); // SPIR-V code generation step. auto SPVFile = runSPIRVCodeGen(*LinkedFile, Args, C); if (!SPVFile) return SPVFile.takeError(); + + if (IsAOTCompileNeeded) { + if (Error Err = runAOTCompile(*SPVFile, Args)) + return Err; + } + return Error::success(); } @@ -394,9 +528,14 @@ int main(int argc, char **argv) { DryRun = Args.hasArg(OPT_dry_run); SaveTemps = Args.hasArg(OPT_save_temps); - OutputFile = "a.spv"; - if (Args.hasArg(OPT_o)) - OutputFile = Args.getLastArgValue(OPT_o); + IsAOTCompileNeeded = Args.hasArg(OPT_arch_EQ); + + if (!Args.hasArg(OPT_o)) + reportError(createStringError("Output file must be specified")); + OutputFile = Args.getLastArgValue(OPT_o); + + if (!Args.hasArg(OPT_triple_EQ)) + reportError(createStringError("Target triple must be specified")); if (Args.hasArg(OPT_spirv_dump_device_code_EQ)) { Arg *A = Args.getLastArg(OPT_spirv_dump_device_code_EQ); diff --git a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td index 1006784973b87..9478fbb4ae4f4 100644 --- a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td +++ b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td @@ -46,3 +46,11 @@ def spirv_dump_device_code_EQ : Joined<["--", "-"], "spirv-dump-device-code=">, def print_linked_module : Flag<["--"], "print-linked-module">, Flags<[LinkerOnlyOption]>, HelpText<"Print the linked module's IR for testing">; + +def ocloc_options_EQ : Joined<["--", "-"], "ocloc-options=">, + Flags<[LinkerOnlyOption]>, + HelpText<"Options passed to ocloc for Intel GPU AOT compilation">; + +def opencl_aot_options_EQ : Joined<["--", "-"], "opencl-aot-options=">, + Flags<[LinkerOnlyOption]>, + HelpText<"Options passed to opencl-aot for CPU AOT compilation">;