Skip to content

Commit 1a38586

Browse files
arsenmclaude
authored andcommitted
clang/AMDGPU: Split out target ID flags in TranslateArgs. (llvm#203750)
Change how xnack and sramecc are processed. Introduce -mxnack/-mno-xnack and -msramecc/-mno-sramecc flags. When the target is first parsed in TranslateArgs, synthesize the appropriate flag for the toolchain. This avoids special case feature string fixups in getAMDGPUTargetFeatures, and also avoids an extra parse of the target ID. In the future this will also simplify tracking these ABI modifiers in a module flag. As a side-effect, you can use these flags to override the no specifier case with the flags. These do not fully replace the target ID syntax, as there's no way to represent compiling both modes for the same subtarget. I didn't bother trying to forward these flags on the main command line without being specified to the offload device, but I suppose that would be possible. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent dfa7621 commit 1a38586

11 files changed

Lines changed: 209 additions & 54 deletions

File tree

clang/include/clang/Options/Options.td

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5981,6 +5981,10 @@ defm sram_ecc_legacy : SimpleMFlag<"sram-ecc", "", "",
59815981
"Legacy option to specify SRAM ECC mode (AMDGPU only)">;
59825982
defm tgsplit : SimpleMFlag<"tgsplit", "Enable", "Disable",
59835983
" threadgroup split execution mode (AMDGPU only)", m_amdgpu_Features_Group>;
5984+
defm xnack : SimpleMFlag<"xnack", "Enable", "Disable",
5985+
" XNACK (AMDGPU only)", m_amdgpu_Features_Group>;
5986+
defm sramecc : SimpleMFlag<"sramecc", "Enable", "Disable",
5987+
" SRAMECC (AMDGPU only)", m_amdgpu_Features_Group>;
59845988
defm wavefrontsize64 : SimpleMFlag<"wavefrontsize64",
59855989
"Specify wavefront size 64", "Specify wavefront size 32",
59865990
" mode (AMDGPU only)">;
@@ -5993,11 +5997,6 @@ def munsafe_fp_atomics : Flag<["-"], "munsafe-fp-atomics">,
59935997
def mno_unsafe_fp_atomics : Flag<["-"], "mno-unsafe-fp-atomics">,
59945998
Visibility<[ClangOption, FlangOption]>, Alias<fno_atomic_ignore_denormal_mode>;
59955999

5996-
// TODO: Remove during upstreaming target id.
5997-
def mxnack : Flag<["-"], "mxnack">, Group<m_amdgpu_Features_Group>,
5998-
HelpText<"Legacy option to specify XNACK mode (AMDGPU only)">;
5999-
def mno_xnack : Flag<["-"], "mno-xnack">, Group<m_amdgpu_Features_Group>;
6000-
60016000
def faltivec : Flag<["-"], "faltivec">, Group<f_Group>;
60026001
def fno_altivec : Flag<["-"], "fno-altivec">, Group<f_Group>;
60036002
let Flags = [TargetSpecific] in {

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 30 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -672,32 +672,7 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
672672
const llvm::Triple &Triple,
673673
const llvm::opt::ArgList &Args,
674674
std::vector<StringRef> &Features,
675-
StringRef TcTargetID) {
676-
// Add target ID features to -target-feature options. No diagnostics should
677-
// be emitted here since invalid target ID is diagnosed at other places.
678-
StringRef TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
679-
// Use this toolchain's TargetID if mcpu is not defined
680-
if (TargetID.empty() && !TcTargetID.empty())
681-
TargetID = TcTargetID;
682-
if (!TargetID.empty()) {
683-
llvm::StringMap<bool> FeatureMap;
684-
auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap);
685-
if (OptionalGpuArch) {
686-
StringRef GpuArch = *OptionalGpuArch;
687-
// Iterate through all possible target ID features for the given GPU.
688-
// If it is mapped to true, add +feature.
689-
// If it is mapped to false, add -feature.
690-
// If it is not in the map (default), do not add it
691-
for (auto &&Feature : getAllPossibleTargetIDFeatures(Triple, GpuArch)) {
692-
auto Pos = FeatureMap.find(Feature);
693-
if (Pos == FeatureMap.end())
694-
continue;
695-
Features.push_back(Args.MakeArgStringRef(
696-
(Twine(Pos->second ? "+" : "-") + Feature).str()));
697-
}
698-
}
699-
}
700-
675+
StringRef /*TcTargetID*/) {
701676
if (Args.hasFlag(options::OPT_mwavefrontsize64,
702677
options::OPT_mno_wavefrontsize64, false))
703678
Features.push_back("+wavefrontsize64");
@@ -801,9 +776,27 @@ AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
801776
if (!BoundArch.empty()) {
802777
DAL->eraseArg(options::OPT_mcpu_EQ);
803778
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), BoundArch);
804-
checkTargetID(*DAL);
805-
} else if (DeviceOffloadKind == Action::OFK_None) {
806-
checkTargetID(*DAL);
779+
}
780+
781+
AMDGPUToolChain::ParsedTargetIDType PTID = checkTargetID(*DAL);
782+
783+
// Synthesize feature flags for target ID modifiers (xnack, sramecc).
784+
if (PTID.OptionalFeatureMap) {
785+
const llvm::StringMap<bool> &FeatureMap = *PTID.OptionalFeatureMap;
786+
787+
auto XnackIt = FeatureMap.find("xnack");
788+
if (XnackIt != FeatureMap.end()) {
789+
DAL->AddFlagArg(nullptr,
790+
Opts.getOption(XnackIt->second ? options::OPT_mxnack
791+
: options::OPT_mno_xnack));
792+
}
793+
794+
auto SrameccIt = FeatureMap.find("sramecc");
795+
if (SrameccIt != FeatureMap.end()) {
796+
DAL->AddFlagArg(nullptr, Opts.getOption(SrameccIt->second
797+
? options::OPT_msramecc
798+
: options::OPT_mno_sramecc));
799+
}
807800
}
808801

809802
if (Args.getLastArgValue(options::OPT_x) != "cl")
@@ -982,7 +975,11 @@ AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const {
982975
AMDGPUToolChain::ParsedTargetIDType
983976
AMDGPUToolChain::getParsedTargetID(const llvm::opt::ArgList &DriverArgs) const {
984977
StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
985-
if (TargetID.empty())
978+
// For offload toolchains (HIP, OpenMP, etc.), `getAuxTriple()` is the host;
979+
// `-march=` there refers to the host CPU (e.g. haswell) and must not be
980+
// parsed as an AMDGPU Target ID. Only standalone AMDGPU uses `-march=` as
981+
// a legacy spelling for the GPU `-mcpu=` (see TranslateArgs when OFK_None).
982+
if (TargetID.empty() && !getAuxTriple())
986983
TargetID = DriverArgs.getLastArgValue(options::OPT_march_EQ);
987984

988985
if (TargetID.empty())
@@ -996,13 +993,14 @@ AMDGPUToolChain::getParsedTargetID(const llvm::opt::ArgList &DriverArgs) const {
996993
return {TargetID.str(), OptionalGpuArch->str(), FeatureMap};
997994
}
998995

999-
void AMDGPUToolChain::checkTargetID(
1000-
const llvm::opt::ArgList &DriverArgs) const {
996+
AMDGPUToolChain::ParsedTargetIDType
997+
AMDGPUToolChain::checkTargetID(const llvm::opt::ArgList &DriverArgs) const {
1001998
auto PTID = getParsedTargetID(DriverArgs);
1002999
if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) {
10031000
getDriver().Diag(clang::diag::err_drv_bad_target_id)
10041001
<< *PTID.OptionalTargetID;
10051002
}
1003+
return PTID;
10061004
}
10071005

10081006
Expected<SmallVector<std::string>>

clang/lib/Driver/ToolChains/AMDGPU.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,16 +158,18 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF {
158158
getSystemGPUArchs(const llvm::opt::ArgList &Args) const override;
159159

160160
protected:
161-
/// Check and diagnose invalid target ID specified by -mcpu.
162-
virtual void checkTargetID(const llvm::opt::ArgList &DriverArgs) const;
163-
164161
/// The struct type returned by getParsedTargetID.
165162
struct ParsedTargetIDType {
166163
std::optional<std::string> OptionalTargetID;
167164
std::optional<std::string> OptionalGPUArch;
168-
std::optional<llvm::StringMap<bool>> OptionalFeatures;
165+
std::optional<llvm::StringMap<bool>> OptionalFeatureMap;
169166
};
170167

168+
/// Check and diagnose invalid target ID specified by -mcpu.
169+
/// Returns the parsed target ID.
170+
virtual ParsedTargetIDType
171+
checkTargetID(const llvm::opt::ArgList &DriverArgs) const;
172+
171173
/// Get target ID, GPU arch, and target ID features if the target ID is
172174
/// specified and valid.
173175
ParsedTargetIDType

clang/lib/Driver/ToolChains/HIPAMD.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -424,13 +424,14 @@ HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs,
424424
return BCLibs;
425425
}
426426

427-
void HIPAMDToolChain::checkTargetID(
428-
const llvm::opt::ArgList &DriverArgs) const {
427+
HIPAMDToolChain::ParsedTargetIDType
428+
HIPAMDToolChain::checkTargetID(const llvm::opt::ArgList &DriverArgs) const {
429429
auto PTID = getParsedTargetID(DriverArgs);
430430
if (PTID.OptionalTargetID && !PTID.OptionalGPUArch &&
431431
PTID.OptionalTargetID != "amdgcnspirv")
432432
getDriver().Diag(clang::diag::err_drv_bad_target_id)
433433
<< *PTID.OptionalTargetID;
434+
return PTID;
434435
}
435436

436437
SPIRVAMDToolChain::SPIRVAMDToolChain(const Driver &D,

clang/lib/Driver/ToolChains/HIPAMD.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ class LLVM_LIBRARY_VISIBILITY HIPAMDToolChain final : public ROCMToolChain {
100100
LTOKind getDefaultLTOMode() const override { return LTOK_Full; }
101101

102102
const ToolChain &HostTC;
103-
void checkTargetID(const llvm::opt::ArgList &DriverArgs) const override;
103+
ParsedTargetIDType
104+
checkTargetID(const llvm::opt::ArgList &DriverArgs) const override;
104105

105106
protected:
106107
Tool *buildLinker() const override;

clang/test/Driver/amdgpu-openmp-toolchain.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363

6464
// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a:sramecc-:xnack+ \
6565
// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID
66-
// CHECK-TARGET-ID: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-target-cpu" "gfx90a" "-target-feature" "-sramecc" "-target-feature" "+xnack"
66+
// CHECK-TARGET-ID: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-target-cpu" "gfx90a" "-target-feature" "+xnack" "-target-feature" "-sramecc"
6767
// CHECK-TARGET-ID: llvm-offload-binary{{.*}}arch=gfx90a:sramecc-:xnack+,kind=openmp
6868

6969
// RUN: not %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a,gfx90a:xnack+ \

clang/test/Driver/amdgpu-toolchain.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@
2525
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+:sramecc- -nogpulib \
2626
// RUN: -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
2727
// LTO: clang{{.*}}"-flto=full"{{.*}}"-fconvergent-functions"
28-
// LTO: ld.lld{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"{{.*}}
28+
// LTO: ld.lld{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=+xnack,-sramecc"{{.*}}
2929

3030
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+:sramecc- -nogpulib \
3131
// RUN: -L. -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=MCPU %s
32-
// MCPU: ld.lld{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"{{.*}}
32+
// MCPU: ld.lld{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=+xnack,-sramecc"{{.*}}
3333

3434
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
3535
// RUN: -fuse-ld=ld %s 2>&1 | FileCheck -check-prefixes=LD %s
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
// Test for -mxnack/-mno-xnack and -msramecc/-mno-sramecc flags
2+
3+
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -mxnack %s 2>&1 | \
4+
// RUN: FileCheck -check-prefix=XNACK-ON %s
5+
// XNACK-ON: "-target-feature" "+xnack"
6+
7+
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -mno-xnack %s 2>&1 | \
8+
// RUN: FileCheck -check-prefix=XNACK-OFF %s
9+
// XNACK-OFF: "-target-feature" "-xnack"
10+
11+
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -msramecc %s 2>&1 | \
12+
// RUN: FileCheck -check-prefix=SRAMECC-ON %s
13+
// SRAMECC-ON: "-target-feature" "+sramecc"
14+
15+
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -mno-sramecc %s 2>&1 | \
16+
// RUN: FileCheck -check-prefix=SRAMECC-OFF %s
17+
// SRAMECC-OFF: "-target-feature" "-sramecc"
18+
19+
// Test that target ID takes precedence over explicit flags
20+
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -mno-xnack %s 2>&1 | \
21+
// RUN: FileCheck -check-prefix=TARGETID-OVERRIDES-XNACK %s
22+
// TARGETID-OVERRIDES-XNACK: "-target-feature" "+xnack"
23+
24+
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -mxnack %s 2>&1 | \
25+
// RUN: FileCheck -check-prefix=TARGETID-OVERRIDES-XNACK-OFF %s
26+
// TARGETID-OVERRIDES-XNACK-OFF: "-target-feature" "-xnack"
27+
28+
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:sramecc+ -mno-sramecc %s 2>&1 | \
29+
// RUN: FileCheck -check-prefix=TARGETID-OVERRIDES-SRAMECC %s
30+
// TARGETID-OVERRIDES-SRAMECC: "-target-feature" "+sramecc"
31+
32+
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:sramecc- -msramecc %s 2>&1 | \
33+
// RUN: FileCheck -check-prefix=TARGETID-OVERRIDES-SRAMECC-OFF %s
34+
// TARGETID-OVERRIDES-SRAMECC-OFF: "-target-feature" "-sramecc"
35+
36+
// Test combining both flags
37+
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -mxnack -msramecc %s 2>&1 | \
38+
// RUN: FileCheck -check-prefixes=BOTH-ON %s
39+
// BOTH-ON: "-target-feature" "+xnack"
40+
// BOTH-ON-SAME: "-target-feature" "+sramecc"
41+
42+
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -mno-xnack -mno-sramecc %s 2>&1 | \
43+
// RUN: FileCheck -check-prefixes=BOTH-OFF %s
44+
// BOTH-OFF: "-target-feature" "-xnack"
45+
// BOTH-OFF-SAME: "-target-feature" "-sramecc"
46+
47+
// Test that target ID without explicit features doesn't synthesize flags
48+
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a %s 2>&1 | \
49+
// RUN: FileCheck -check-prefix=NO-FEATURES %s
50+
// NO-FEATURES-NOT: "-target-feature" "{{[+-]}}xnack"
51+
// NO-FEATURES-NOT: "-target-feature" "{{[+-]}}sramecc"
52+
53+
// Test target ID features are synthesized
54+
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ %s 2>&1 | \
55+
// RUN: FileCheck -check-prefix=TARGETID-XNACK %s
56+
// TARGETID-XNACK: "-target-feature" "+xnack"
57+
58+
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:sramecc+ %s 2>&1 | \
59+
// RUN: FileCheck -check-prefix=TARGETID-SRAMECC %s
60+
// TARGETID-SRAMECC: "-target-feature" "+sramecc"
61+
62+
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+:sramecc+ %s 2>&1 | \
63+
// RUN: FileCheck -check-prefixes=TARGETID-BOTH %s
64+
// TARGETID-BOTH: "-target-feature" "+xnack"
65+
// TARGETID-BOTH-SAME: "-target-feature" "+sramecc"
66+
67+
//
68+
// Offload tests
69+
//
70+
71+
// Test offload with target ID features synthesized from --offload-arch
72+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx90a:xnack+:sramecc- \
73+
// RUN: -nogpulib %s 2>&1 | FileCheck -check-prefix=OMP-TARGETID %s
74+
// OMP-TARGETID: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-target-cpu" "gfx90a"
75+
// OMP-TARGETID-SAME: "-target-feature" "+xnack"
76+
// OMP-TARGETID-SAME: "-target-feature" "-sramecc"
77+
78+
// Test offload using -fopenmp-targets with target ID
79+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
80+
// RUN: -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908:xnack-:sramecc+ \
81+
// RUN: -nogpulib %s 2>&1 | FileCheck -check-prefix=OMP-MARCH %s
82+
// OMP-MARCH: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-target-cpu" "gfx908"
83+
// OMP-MARCH-SAME: "-target-feature" "-xnack"
84+
// OMP-MARCH-SAME: "-target-feature" "+sramecc"
85+
86+
// Test offload with explicit device flags using -Xopenmp-target
87+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
88+
// RUN: -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a \
89+
// RUN: -Xopenmp-target=amdgcn-amd-amdhsa -mxnack \
90+
// RUN: -Xopenmp-target=amdgcn-amd-amdhsa -mno-sramecc \
91+
// RUN: -nogpulib %s 2>&1 | FileCheck -check-prefix=OMP-FLAGS %s
92+
// OMP-FLAGS: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-target-cpu" "gfx90a"
93+
// OMP-FLAGS-SAME: "-target-feature" "+xnack"
94+
// OMP-FLAGS-SAME: "-target-feature" "-sramecc"
95+
96+
// Test offload with target ID taking precedence over explicit flags
97+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
98+
// RUN: -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a:xnack- \
99+
// RUN: -Xopenmp-target=amdgcn-amd-amdhsa -mxnack \
100+
// RUN: -nogpulib %s 2>&1 | FileCheck -check-prefix=OMP-TARGETID-WINS %s
101+
// OMP-TARGETID-WINS: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-target-cpu" "gfx90a"
102+
// OMP-TARGETID-WINS-SAME: "-target-feature" "-xnack"
103+
104+
// Test offload using base architecture gfx90a with -mxnack flag for xnack+
105+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp \
106+
// RUN: --offload-arch=gfx90a \
107+
// RUN: -Xopenmp-target=amdgcn-amd-amdhsa -mxnack \
108+
// RUN: -nogpulib %s 2>&1 | FileCheck -check-prefix=OMP-GFX90A-XNACK-ON %s
109+
// OMP-GFX90A-XNACK-ON: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-target-cpu" "gfx90a"
110+
// OMP-GFX90A-XNACK-ON-SAME: "-target-feature" "+xnack"
111+
112+
// Test offload using base architecture gfx90a with -mno-xnack flag for xnack-
113+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp \
114+
// RUN: --offload-arch=gfx90a \
115+
// RUN: -Xopenmp-target=amdgcn-amd-amdhsa -mno-xnack \
116+
// RUN: -nogpulib %s 2>&1 | FileCheck -check-prefix=OMP-GFX90A-XNACK-OFF %s
117+
// OMP-GFX90A-XNACK-OFF: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-target-cpu" "gfx90a"
118+
// OMP-GFX90A-XNACK-OFF-SAME: "-target-feature" "-xnack"
119+
120+
// Test offload with multiple device compilations for same base architecture.
121+
// To get both xnack+ and xnack- for gfx90a in the same invocation, you must use
122+
// target ID syntax in --offload-arch.
123+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp \
124+
// RUN: --offload-arch=gfx90a:xnack+ --offload-arch=gfx90a:xnack- -mxnack \
125+
// RUN: -nogpulib %s 2>&1 | FileCheck -check-prefix=OMP-MULTI-XNACK %s
126+
// OMP-MULTI-XNACK: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-target-cpu" "gfx90a"
127+
// OMP-MULTI-XNACK-SAME: "-target-feature" "+xnack"
128+
// OMP-MULTI-XNACK: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-target-cpu" "gfx90a"
129+
// OMP-MULTI-XNACK-SAME: "-target-feature" "-xnack"
130+
131+
// Test that -Xopenmp-target flags apply to all targets with matching triple.
132+
// When compiling for multiple different base architectures (gfx906, gfx90a),
133+
// -Xopenmp-target=amdgcn-amd-amdhsa applies the flag to all of them.
134+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp \
135+
// RUN: --offload-arch=gfx906 --offload-arch=gfx90a \
136+
// RUN: -Xopenmp-target=amdgcn-amd-amdhsa -mxnack \
137+
// RUN: -nogpulib %s 2>&1 | FileCheck -check-prefix=OMP-MULTI-ARCH %s
138+
// OMP-MULTI-ARCH: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-target-cpu" "gfx906"
139+
// OMP-MULTI-ARCH-SAME: "-target-feature" "+xnack"
140+
// OMP-MULTI-ARCH: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-target-cpu" "gfx90a"
141+
// OMP-MULTI-ARCH-SAME: "-target-feature" "+xnack"
142+
143+
// Test that top-level -mxnack flags (not specified to the device are ignored).
144+
// TODO: Should this be forwarded?
145+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp \
146+
// RUN: --offload-arch=gfx90a -mxnack -mno-sramecc \
147+
// RUN: -nogpulib %s 2>&1 | FileCheck -check-prefix=GENERIC-ARG %s
148+
// GENERIC-ARG: warning: argument unused during compilation: '-mxnack'
149+
// GENERIC-ARG: warning: argument unused during compilation: '-mno-sramecc'

clang/test/Driver/hip-target-id.hip

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,24 @@
2323

2424
// CHECK: [[CLANG:"[^"]*clang[^"]*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
2525
// CHECK-SAME: "-target-cpu" "gfx908"
26-
// CHECK-SAME: "-target-feature" "+sramecc"
2726
// CHECK-SAME: "-target-feature" "+xnack"
27+
// CHECK-SAME: "-target-feature" "+sramecc"
28+
29+
// TMP: [[CLANG:"[^"]*clang[^"]*"]] "-cc1as" "-triple" "amdgcn-amd-amdhsa"
30+
// TMP-SAME: "-target-cpu" "gfx908"
31+
// TMP-SAME: "-target-feature" "+xnack"
32+
// TMP-SAME: "-target-feature" "+sramecc"
2833

2934
// CHECK: [[LLD:"[^"]*lld[^"]*"]] {{.*}} "-plugin-opt=mcpu=gfx908"
30-
// CHECK-SAME: "-plugin-opt=-mattr=+sramecc,+xnack"
35+
// CHECK-SAME: "-plugin-opt=-mattr=+xnack,+sramecc"
3136

3237
// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
3338
// CHECK-SAME: "-target-cpu" "gfx908"
34-
// CHECK-SAME: "-target-feature" "-sramecc"
3539
// CHECK-SAME: "-target-feature" "+xnack"
40+
// CHECK-SAME: "-target-feature" "-sramecc"
3641

3742
// CHECK: [[LLD]] {{.*}} "-plugin-opt=mcpu=gfx908"
38-
// CHECK-SAME: "-plugin-opt=-mattr=-sramecc,+xnack"
43+
// CHECK-SAME: "-plugin-opt=-mattr=+xnack,-sramecc"
3944

4045
// CHECK: {{"[^"]*clang-offload-bundler[^"]*"}}
4146
// CHECK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx908:sramecc+:xnack+,hipv4-amdgcn-amd-amdhsa--gfx908:sramecc-:xnack+"

clang/test/Driver/hip-toolchain-features.hip

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@
4141
// RUN: -nogpuinc --offload-arch=gfx908:xnack-:sramecc- --no-offload-new-driver %s \
4242
// RUN: 2>&1 | FileCheck %s -check-prefix=NOALL3
4343

44-
// ALL3: {{.*}}clang{{.*}}"-target-feature" "+sramecc" "-target-feature" "+xnack"
45-
// NOALL3: {{.*}}clang{{.*}}"-target-feature" "-sramecc" "-target-feature" "-xnack"
44+
// ALL3: {{.*}}clang{{.*}}"-target-feature" "+xnack" "-target-feature" "+sramecc"
45+
// NOALL3: {{.*}}clang{{.*}}"-target-feature" "-xnack" "-target-feature" "-sramecc"
4646

4747
// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
4848
// RUN: -nogpuinc --offload-arch=gfx1010 --no-offload-new-driver %s \

0 commit comments

Comments
 (0)