Skip to content

Commit d89d7a3

Browse files
committed
[CIR][CUDA] Initial support for device compilation
1 parent 90a5b61 commit d89d7a3

File tree

4 files changed

+46
-17
lines changed

4 files changed

+46
-17
lines changed

clang/lib/CIR/CodeGen/CIRGenCall.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1639,9 +1639,9 @@ static void getTrivialDefaultFunctionAttributes(
16391639
// TODO: NoThrow attribute should be added for other GPU modes CUDA, SYCL,
16401640
// HIP, OpenMP offload.
16411641
// AFAIK, neither of them support exceptions in device code.
1642-
if ((langOpts.CUDA && langOpts.CUDAIsDevice) || langOpts.SYCLIsDevice)
1642+
if (langOpts.SYCLIsDevice)
16431643
llvm_unreachable("NYI");
1644-
if (langOpts.OpenCL) {
1644+
if (langOpts.OpenCL || (langOpts.CUDA && langOpts.CUDAIsDevice)) {
16451645
auto noThrow = cir::NoThrowAttr::get(CGM.getBuilder().getContext());
16461646
funcAttrs.set(noThrow.getMnemonic(), noThrow);
16471647
}

clang/lib/CIR/CodeGen/CIRGenModule.cpp

+25-14
Original file line numberDiff line numberDiff line change
@@ -516,16 +516,32 @@ void CIRGenModule::emitGlobal(GlobalDecl GD) {
516516
assert(!Global->hasAttr<CPUDispatchAttr>() && "NYI");
517517

518518
if (langOpts.CUDA) {
519-
if (langOpts.CUDAIsDevice)
520-
llvm_unreachable("NYI");
519+
if (langOpts.CUDAIsDevice) {
520+
// This will implicitly mark templates and their
521+
// specializations as __host__ __device__.
522+
if (langOpts.OffloadImplicitHostDeviceTemplates)
523+
llvm_unreachable("NYI");
521524

522-
if (dyn_cast<VarDecl>(Global))
523-
llvm_unreachable("NYI");
525+
// This maps some parallel standard libraries implicitly
526+
// to GPU, even when they are not marked __device__.
527+
if (langOpts.HIPStdPar)
528+
llvm_unreachable("NYI");
524529

525-
// We must skip __device__ functions when compiling for host.
526-
if (!Global->hasAttr<CUDAHostAttr>() && Global->hasAttr<CUDADeviceAttr>()) {
527-
return;
530+
if (Global->hasAttr<CUDAGlobalAttr>())
531+
llvm_unreachable("NYI");
532+
533+
if (!Global->hasAttr<CUDADeviceAttr>())
534+
return;
535+
} else {
536+
// We must skip __device__ functions when compiling for host.
537+
if (!Global->hasAttr<CUDAHostAttr>() &&
538+
Global->hasAttr<CUDADeviceAttr>()) {
539+
return;
540+
}
528541
}
542+
543+
if (dyn_cast<VarDecl>(Global))
544+
llvm_unreachable("NYI");
529545
}
530546

531547
if (langOpts.OpenMP) {
@@ -2415,8 +2431,6 @@ StringRef CIRGenModule::getMangledName(GlobalDecl GD) {
24152431
}
24162432
}
24172433

2418-
assert(!langOpts.CUDAIsDevice && "NYI");
2419-
24202434
// Keep the first result in the case of a mangling collision.
24212435
const auto *ND = cast<NamedDecl>(GD.getDecl());
24222436
std::string MangledName = getMangledNameImpl(*this, GD, ND);
@@ -3099,7 +3113,8 @@ void CIRGenModule::emitDeferred(unsigned recursionLimit) {
30993113
// Emit CUDA/HIP static device variables referenced by host code only. Note we
31003114
// should not clear CUDADeviceVarODRUsedByHost since it is still needed for
31013115
// further handling.
3102-
if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
3116+
if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
3117+
!getASTContext().CUDADeviceVarODRUsedByHost.empty()) {
31033118
llvm_unreachable("NYI");
31043119
}
31053120

@@ -3392,10 +3407,6 @@ void CIRGenModule::Release() {
33923407
llvm_unreachable("NYI");
33933408
}
33943409

3395-
if (langOpts.CUDAIsDevice && getTriple().isNVPTX()) {
3396-
llvm_unreachable("NYI");
3397-
}
3398-
33993410
if (langOpts.EHAsynch)
34003411
llvm_unreachable("NYI");
34013412

clang/lib/CIR/CodeGen/CIRGenTypes.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,11 @@ mlir::Type CIRGenTypes::convertType(QualType T) {
348348

349349
// For the device-side compilation, CUDA device builtin surface/texture types
350350
// may be represented in different types.
351-
assert(!astContext.getLangOpts().CUDAIsDevice && "not implemented");
351+
if (astContext.getLangOpts().CUDAIsDevice) {
352+
if (Ty->isCUDADeviceBuiltinSurfaceType() ||
353+
Ty->isCUDADeviceBuiltinTextureType())
354+
llvm_unreachable("NYI");
355+
}
352356

353357
if (const auto *recordType = dyn_cast<RecordType>(T))
354358
return convertRecordDeclType(recordType->getDecl());
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#include "../Inputs/cuda.h"
2+
3+
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \
4+
// RUN: -fclangir -emit-cir -o - %s | FileCheck %s
5+
6+
// This shouldn't emit.
7+
__host__ void host_fn(int *a, int *b, int *c) {}
8+
9+
// CHECK-NOT: cir.func @_Z7host_fnPiS_S_
10+
11+
// This should emit as a normal C++ function.
12+
__device__ void device_fn(int* a, double b, float c) {}
13+
14+
// CIR: cir.func @_Z9device_fnPidf

0 commit comments

Comments
 (0)