diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index b706fa3759c0d..cc671796803fd 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6753,6 +6753,8 @@ class MappableExprsHandler { Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; + if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_self)) + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_SELF; if (IsNonContiguous) Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG; return Bits; @@ -9820,7 +9822,8 @@ void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { for (const OMPClause *Clause : D->clauselists()) { - if (Clause->getClauseKind() == OMPC_unified_shared_memory) { + if (Clause->getClauseKind() == OMPC_unified_shared_memory || + Clause->getClauseKind() == OMPC_self_maps) { HasRequiresUnifiedSharedMemory = true; OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true); } else if (const auto *AC = diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index feb2448297542..a0ac7b0bc2980 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2245,7 +2245,8 @@ static OffloadArch getOffloadArch(CodeGenModule &CGM) { /// a restriction for OpenMP requires clause "unified_shared_memory". void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) { for (const OMPClause *Clause : D->clauselists()) { - if (Clause->getClauseKind() == OMPC_unified_shared_memory) { + if (Clause->getClauseKind() == OMPC_unified_shared_memory || + Clause->getClauseKind() == OMPC_self_maps) { OffloadArch Arch = getOffloadArch(CGM); switch (Arch) { case OffloadArch::SM_20: diff --git a/clang/test/OpenMP/requires_codegen.cpp b/clang/test/OpenMP/requires_codegen.cpp index 5a641d0be4deb..b484f6266c045 100644 --- a/clang/test/OpenMP/requires_codegen.cpp +++ b/clang/test/OpenMP/requires_codegen.cpp @@ -15,6 +15,22 @@ // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_72 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_75 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -DREGION_HOST_SELF +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_21 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_SELF +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_30 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_SELF +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_32 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_SELF +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_35 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_SELF +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_37 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_SELF +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_50 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_SELF +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_52 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_SELF +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_53 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_SELF +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_60 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_SELF_NO_ERR +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_SELF_NO_ERR +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_62 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_SELF_NO_ERR +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_70 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_SELF_NO_ERR +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_72 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_SELF_NO_ERR +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_75 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_SELF_NO_ERR + #if defined(REGION_HOST) || defined(REGION_DEVICE_NO_ERR) // expected-no-diagnostics #pragma omp requires unified_shared_memory @@ -23,3 +39,14 @@ #ifdef REGION_DEVICE #pragma omp requires unified_shared_memory // expected-error-re {{Target architecture sm_{{20|21|30|32|35|37|50|52|53|60|61|62}} does not support unified addressing}} #endif + +#ifdef OMP60 +#if defined(REGION_HOST_SELF) || defined(REGION_DEVICE_SELF_NO_ERR) +// expected-no-diagnostics +#pragma omp requires self_maps +#endif + +#ifdef REGION_DEVICE_SELF +#pragma omp requires self_maps // expected-error-re {{Target architecture sm_{{20|21|30|32|35|37|50|52|53}} does not support unified addressing}} +#endif +#endif diff --git a/clang/test/OpenMP/target_data_codegen.cpp b/clang/test/OpenMP/target_data_codegen.cpp index 926aa593f2ba1..adf8f3f40dd2d 100644 --- a/clang/test/OpenMP/target_data_codegen.cpp +++ b/clang/test/OpenMP/target_data_codegen.cpp @@ -690,4 +690,29 @@ void test_present_modifier(int arg) { {++arg;} } #endif +///==========================================================================/// + +// RUN: %clang_cc1 -DCK10 -verify -Wno-vla -fopenmp -fopenmp-version=60 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK10 --check-prefix CK10-64 +// RUN: %clang_cc1 -DCK10 -fopenmp -fopenmp-version=60 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix CK10 --check-prefix CK10-64 +// RUN: %clang_cc1 -DCK10 -verify -Wno-vla -fopenmp -fopenmp-version=60 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK10 --check-prefix CK10-32 +// RUN: %clang_cc1 -DCK10 -fopenmp -fopenmp-version=60 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix CK10 --check-prefix CK10-32 + +// RUN: %clang_cc1 -DCK10 -verify -Wno-vla -fopenmp-simd -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s +// RUN: %clang_cc1 -DCK10 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s +// RUN: %clang_cc1 -DCK10 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s +// SIMD-ONLY2-NOT: {{__kmpc|__tgt}} + +#ifdef CK10 +void test_self_modifier(int arg) { +// SELF=0x4000 | FROM=0x2 | TO=0x1 = 0x4003 +// CK10: private unnamed_addr constant [1 x i64] [i64 [[#0x4003]]] +#pragma omp target data map(self, tofrom \ + : arg) + {++arg;} +} +#endif #endif diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index 338b56226f204..74abacb404de5 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -236,6 +236,8 @@ enum class OpenMPOffloadMappingFlags : uint64_t { // dynamic. // This is an OpenMP extension for the sake of OpenACC support. OMP_MAP_OMPX_HOLD = 0x2000, + /// Self directs mapping without creating a separate device copy. + OMP_MAP_SELF = 0x4000, /// Signal that the runtime library should use args as an array of /// descriptor_dim pointers and use args_size as dims. Used when we have /// non-contiguous list items in target update directive