Skip to content

Commit 597fda6

Browse files
committed
[SYCL] Add clang-linker-wrapper changes to call clang-sycl-linker for SYCL offloads
Device code linking happens inside clang-linker-wrapper. In the current implementation, clang-linker-wrapper does the following: 1. Extracts device code. Input_1, Input_2,..... 2. Group device code according to target devices Inputs[triple_1] = .... Inputs[triple_2] = .... 3. For each group, i.e. Inputs[triple_i], a. Gather all the offload kinds found inside those inputs in ActiveOffloadKinds b. Link all images inside Inputs[triple_i] by calling clang --target=triple_i .... c. Create a copy of that linked image for each offload kind and add it to Output[Kind] list. In SYCL compilation flow, there is a deviation in Step 3b. We call device code splitting inside the 'clang --target=triple_i ....' call and the output is now a 'packaged' file containing multiple device images. This deviation requires us to capture the OffloadKind during the linking stage and pass it along to the linking function (clang), so that clang can be called with a unique option '--sycl-link' that will help us to call 'clang-sycl-linker' under the hood (clang-sycl-linker will do SYCL specific linking). Our current objective is to implement an end-to-end SYCL offloading flow and get it working. We will eventually merge our approach with the community flow. Signed-off-by: Arvind Sudarsanam <[email protected]>
1 parent 6cfec29 commit 597fda6

File tree

5 files changed

+134
-23
lines changed

5 files changed

+134
-23
lines changed

Diff for: clang/docs/ClangOffloadPackager.rst

+2
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ the following values for the :ref:`offload kind<table-offload_kind>` and the
112112
+------------+-------+---------------------------------------+
113113
| OFK_HIP | 0x03 | The producer was HIP |
114114
+------------+-------+---------------------------------------+
115+
| OFK_SYCL | 0x04 | The producer was SYCL |
116+
+------------+-------+---------------------------------------+
115117

116118
The flags are used to signify certain conditions, such as the presence of
117119
debugging information or whether or not LTO was used. The string entry table is

Diff for: clang/test/Driver/linker-wrapper.c

+10
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
// REQUIRES: x86-registered-target
33
// REQUIRES: nvptx-registered-target
44
// REQUIRES: amdgpu-registered-target
5+
// REQUIRES: spirv-registered-target
56

67
// An externally visible variable so static libraries extract.
78
__attribute__((visibility("protected"), used)) int x;
89

910
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o
1011
// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -o %t.nvptx.bc
1112
// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -o %t.amdgpu.bc
13+
// RUN: %clang -cc1 %s -triple spirv64-unknown-unknown -emit-llvm-bc -o %t.spirv.bc
1214

1315
// RUN: clang-offload-packager -o %t.out \
1416
// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
@@ -49,6 +51,14 @@ __attribute__((visibility("protected"), used)) int x;
4951

5052
// AMDGPU-LTO-TEMPS: clang{{.*}} --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -flto {{.*}}-save-temps
5153

54+
// RUN: clang-offload-packager -o %t.out \
55+
// RUN: --image=file=%t.spirv.bc,kind=sycl,triple=spirv64-unknown-unknown,arch=generic
56+
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
57+
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
58+
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=SPIRV-LINK
59+
60+
// SPIRV-LINK: clang{{.*}} -o {{.*}}.img --target=spirv64-unknown-unknown {{.*}}.o --sycl-link -Xlinker -triple=spirv64-unknown-unknown -Xlinker -arch=
61+
5262
// RUN: clang-offload-packager -o %t.out \
5363
// RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
5464
// RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu

Diff for: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

+54-4
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,8 @@ fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
464464
} // namespace amdgcn
465465

466466
namespace generic {
467-
Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
467+
Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
468+
bool HasSYCLOffloadKind = false) {
468469
llvm::TimeTraceScope TimeScope("Clang");
469470
// Use `clang` to invoke the appropriate device tools.
470471
Expected<std::string> ClangPath =
@@ -554,6 +555,17 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
554555
if (Args.hasArg(OPT_embed_bitcode))
555556
CmdArgs.push_back("-Wl,--lto-emit-llvm");
556557

558+
// For linking device code with the SYCL offload kind, special handling is
559+
// required. Passing --sycl-link to clang results in a call to
560+
// clang-sycl-linker. Additional linker flags required by clang-sycl-linker
561+
// will be communicated via the -Xlinker option.
562+
if (HasSYCLOffloadKind) {
563+
CmdArgs.push_back("--sycl-link");
564+
CmdArgs.append(
565+
{"-Xlinker", Args.MakeArgString("-triple=" + Triple.getTriple())});
566+
CmdArgs.append({"-Xlinker", Args.MakeArgString("-arch=" + Arch)});
567+
}
568+
557569
for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ))
558570
CmdArgs.append({"-Xlinker", Args.MakeArgString(Arg)});
559571
for (StringRef Arg : Args.getAllArgValues(OPT_compiler_arg_EQ))
@@ -567,7 +579,8 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
567579
} // namespace generic
568580

569581
Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
570-
const ArgList &Args) {
582+
const ArgList &Args,
583+
bool HasSYCLOffloadKind = false) {
571584
const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
572585
switch (Triple.getArch()) {
573586
case Triple::nvptx:
@@ -582,7 +595,7 @@ Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
582595
case Triple::spirv64:
583596
case Triple::systemz:
584597
case Triple::loongarch64:
585-
return generic::clang(InputFiles, Args);
598+
return generic::clang(InputFiles, Args, HasSYCLOffloadKind);
586599
default:
587600
return createStringError(Triple.getArchName() +
588601
" linking is not supported");
@@ -936,6 +949,38 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
936949
InputFiles.emplace_back(*FileNameOrErr);
937950
}
938951

952+
if (ActiveOffloadKindMask & OFK_SYCL) {
953+
// Link the remaining device files using the device linker.
954+
auto OutputOrErr = linkDevice(InputFiles, LinkerArgs, HasSYCLOffloadKind);
955+
if (!OutputOrErr)
956+
return OutputOrErr.takeError();
957+
// Output is a packaged object of device images. Unpackage the images and
958+
// copy them to Images[Kind]
959+
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
960+
MemoryBuffer::getFileOrSTDIN(*OutputOrErr);
961+
if (std::error_code EC = BufferOrErr.getError())
962+
return createFileError(*OutputOrErr, EC);
963+
964+
MemoryBufferRef Buffer = **BufferOrErr;
965+
SmallVector<OffloadFile> Binaries;
966+
if (Error Err = extractOffloadBinaries(Buffer, Binaries))
967+
return std::move(Err);
968+
for (auto &OffloadFile : Binaries) {
969+
auto TheBinary = OffloadFile.getBinary();
970+
OffloadingImage TheImage{};
971+
TheImage.TheImageKind = TheBinary->getImageKind();
972+
TheImage.TheOffloadKind = TheBinary->getOffloadKind();
973+
TheImage.StringData["triple"] = TheBinary->getTriple();
974+
TheImage.StringData["arch"] = TheBinary->getArch();
975+
TheImage.Image = MemoryBuffer::getMemBufferCopy(TheBinary->getImage());
976+
Images[OFK_SYCL].emplace_back(std::move(TheImage));
977+
}
978+
}
979+
980+
// Exit early if no other offload kind found (other than OFK_SYCL).
981+
if ((ActiveOffloadKindMask ^ OFK_SYCL) == 0) {
982+
return Error::success();
983+
939984
// Link the remaining device files using the device linker.
940985
auto OutputOrErr = linkDevice(InputFiles, LinkerArgs);
941986
if (!OutputOrErr)
@@ -944,7 +989,7 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
944989
// Store the offloading image for each linked output file.
945990
for (OffloadKind Kind = OFK_OpenMP; Kind != OFK_LAST;
946991
Kind = static_cast<OffloadKind>((uint16_t)(Kind) << 1)) {
947-
if ((ActiveOffloadKindMask & Kind) == 0)
992+
if (((ActiveOffloadKindMask & Kind) == 0) || (Kind == OFK_SYCL))
948993
continue;
949994
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
950995
llvm::MemoryBuffer::getFileOrSTDIN(*OutputOrErr);
@@ -988,6 +1033,11 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
9881033
A.StringData["arch"] > B.StringData["arch"] ||
9891034
A.TheOffloadKind < B.TheOffloadKind;
9901035
});
1036+
if (Kind == OFK_SYCL) {
1037+
// TODO: Update once SYCL offload wrapping logic is available.
1038+
reportError(
1039+
createStringError("SYCL offload wrapping logic is not available"));
1040+
}
9911041
auto BundledImagesOrErr = bundleLinkedOutput(Input, Args, Kind);
9921042
if (!BundledImagesOrErr)
9931043
return BundledImagesOrErr.takeError();

Diff for: clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp

+65-19
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ static StringRef OutputFile;
7070
/// Directory to dump SPIR-V IR if requested by user.
7171
static SmallString<128> SPIRVDumpDir;
7272

73+
using OffloadingImage = OffloadBinary::OffloadingImage;
74+
7375
static void printVersion(raw_ostream &OS) {
7476
OS << clang::getClangToolFullVersion("clang-sycl-linker") << '\n';
7577
}
@@ -168,10 +170,10 @@ Expected<SmallVector<std::string>> getInput(const ArgList &Args) {
168170
/// are LLVM IR bitcode files.
169171
// TODO: Support SPIR-V IR files.
170172
Expected<std::unique_ptr<Module>> getBitcodeModule(StringRef File,
171-
LLVMContext &C) {
173+
LLVMContext &Ctx) {
172174
SMDiagnostic Err;
173175

174-
auto M = getLazyIRFileModule(File, Err, C);
176+
auto M = getLazyIRFileModule(File, Err, Ctx);
175177
if (M)
176178
return std::move(M);
177179
return createStringError(Err.getMessage());
@@ -211,16 +213,16 @@ Expected<SmallVector<std::string>> getSYCLDeviceLibs(const ArgList &Args) {
211213
/// 3. Link all the images gathered in Step 2 with the output of Step 1 using
212214
/// linkInModule API. LinkOnlyNeeded flag is used.
213215
Expected<StringRef> linkDeviceCode(ArrayRef<std::string> InputFiles,
214-
const ArgList &Args, LLVMContext &C) {
216+
const ArgList &Args, LLVMContext &Ctx) {
215217
llvm::TimeTraceScope TimeScope("SYCL link device code");
216218

217219
assert(InputFiles.size() && "No inputs to link");
218220

219-
auto LinkerOutput = std::make_unique<Module>("sycl-device-link", C);
221+
auto LinkerOutput = std::make_unique<Module>("sycl-device-link", Ctx);
220222
Linker L(*LinkerOutput);
221223
// Link SYCL device input files.
222224
for (auto &File : InputFiles) {
223-
auto ModOrErr = getBitcodeModule(File, C);
225+
auto ModOrErr = getBitcodeModule(File, Ctx);
224226
if (!ModOrErr)
225227
return ModOrErr.takeError();
226228
if (L.linkInModule(std::move(*ModOrErr)))
@@ -235,7 +237,7 @@ Expected<StringRef> linkDeviceCode(ArrayRef<std::string> InputFiles,
235237
// Link in SYCL device library files.
236238
const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
237239
for (auto &File : *SYCLDeviceLibFiles) {
238-
auto LibMod = getBitcodeModule(File, C);
240+
auto LibMod = getBitcodeModule(File, Ctx);
239241
if (!LibMod)
240242
return LibMod.takeError();
241243
if ((*LibMod)->getTargetTriple() == Triple) {
@@ -278,18 +280,18 @@ Expected<StringRef> linkDeviceCode(ArrayRef<std::string> InputFiles,
278280
/// Converts 'File' from LLVM bitcode to SPIR-V format using SPIR-V backend.
279281
/// 'Args' encompasses all arguments required for linking device code and will
280282
/// be parsed to generate options required to be passed into the backend.
281-
static Expected<StringRef> runSPIRVCodeGen(StringRef File, const ArgList &Args,
282-
LLVMContext &C) {
283+
static Error runSPIRVCodeGen(StringRef File, const ArgList &Args,
284+
StringRef OutputFile, LLVMContext &Ctx) {
283285
llvm::TimeTraceScope TimeScope("SPIR-V code generation");
284286

285287
// Parse input module.
286-
SMDiagnostic Err;
287-
std::unique_ptr<Module> M = parseIRFile(File, Err, C);
288+
SMDiagnostic E;
289+
std::unique_ptr<Module> M = parseIRFile(File, E, Ctx);
288290
if (!M)
289-
return createStringError(Err.getMessage());
291+
return createStringError(E.getMessage());
290292

291293
if (Error Err = M->materializeAll())
292-
return std::move(Err);
294+
return Err;
293295

294296
Triple TargetTriple(Args.getLastArgValue(OPT_triple_EQ));
295297
M->setTargetTriple(TargetTriple);
@@ -333,7 +335,7 @@ static Expected<StringRef> runSPIRVCodeGen(StringRef File, const ArgList &Args,
333335
errs() << formatv("SPIR-V Backend: input: {0}, output: {1}\n", File,
334336
OutputFile);
335337

336-
return OutputFile;
338+
return Error::success();
337339
}
338340

339341
/// Performs the following steps:
@@ -342,17 +344,61 @@ static Expected<StringRef> runSPIRVCodeGen(StringRef File, const ArgList &Args,
342344
Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) {
343345
llvm::TimeTraceScope TimeScope("SYCL device linking");
344346

345-
LLVMContext C;
347+
LLVMContext Ctx;
346348

347349
// Link all input bitcode files and SYCL device library files, if any.
348-
auto LinkedFile = linkDeviceCode(Files, Args, C);
350+
auto LinkedFile = linkDeviceCode(Files, Args, Ctx);
349351
if (!LinkedFile)
350352
reportError(LinkedFile.takeError());
351353

354+
// TODO: SYCL post link functionality involves device code splitting and will
355+
// result in multiple bitcode codes.
356+
// The following lines are placeholders to represent multiple files and will
357+
// be refactored once SYCL post link support is available.
358+
SmallVector<std::string> SplitModules;
359+
SplitModules.emplace_back(*LinkedFile);
360+
352361
// SPIR-V code generation step.
353-
auto SPVFile = runSPIRVCodeGen(*LinkedFile, Args, C);
354-
if (!SPVFile)
355-
return SPVFile.takeError();
362+
for (size_t I = 0, E = SplitModules.size(); I != E; ++I) {
363+
auto Stem = OutputFile.rsplit('.').first;
364+
std::string SPVFile(Stem);
365+
SPVFile.append("_" + utostr(I) + ".spv");
366+
auto Err = runSPIRVCodeGen(SplitModules[I], Args, SPVFile, Ctx);
367+
if (Err)
368+
return std::move(Err);
369+
SplitModules[I] = SPVFile;
370+
}
371+
372+
// Write the final output into file.
373+
int FD = -1;
374+
if (std::error_code EC = sys::fs::openFileForWrite(OutputFile, FD))
375+
return errorCodeToError(EC);
376+
llvm::raw_fd_ostream FS(FD, /*shouldClose=*/true);
377+
378+
for (size_t I = 0, E = SplitModules.size(); I != E; ++I) {
379+
auto File = SplitModules[I];
380+
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
381+
llvm::MemoryBuffer::getFileOrSTDIN(File);
382+
if (std::error_code EC = FileOrErr.getError()) {
383+
if (DryRun)
384+
FileOrErr = MemoryBuffer::getMemBuffer("");
385+
else
386+
return createFileError(File, EC);
387+
}
388+
OffloadingImage TheImage{};
389+
TheImage.TheImageKind = IMG_Object;
390+
TheImage.TheOffloadKind = OFK_SYCL;
391+
TheImage.StringData["triple"] =
392+
Args.MakeArgString(Args.getLastArgValue(OPT_triple_EQ));
393+
TheImage.StringData["arch"] =
394+
Args.MakeArgString(Args.getLastArgValue(OPT_arch_EQ));
395+
TheImage.Image = std::move(*FileOrErr);
396+
397+
llvm::SmallString<0> Buffer = OffloadBinary::write(TheImage);
398+
if (Buffer.size() % OffloadBinary::getAlignment() != 0)
399+
return createStringError("Offload binary has invalid size alignment");
400+
FS << Buffer;
401+
}
356402
return Error::success();
357403
}
358404

@@ -394,7 +440,7 @@ int main(int argc, char **argv) {
394440
DryRun = Args.hasArg(OPT_dry_run);
395441
SaveTemps = Args.hasArg(OPT_save_temps);
396442

397-
OutputFile = "a.spv";
443+
OutputFile = "a.out";
398444
if (Args.hasArg(OPT_o))
399445
OutputFile = Args.getLastArgValue(OPT_o);
400446

Diff for: llvm/lib/Object/OffloadBinary.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ OffloadKind object::getOffloadKind(StringRef Name) {
301301
.Case("openmp", OFK_OpenMP)
302302
.Case("cuda", OFK_Cuda)
303303
.Case("hip", OFK_HIP)
304+
.Case("sycl", OFK_SYCL)
304305
.Default(OFK_None);
305306
}
306307

@@ -312,6 +313,8 @@ StringRef object::getOffloadKindName(OffloadKind Kind) {
312313
return "cuda";
313314
case OFK_HIP:
314315
return "hip";
316+
case OFK_SYCL:
317+
return "sycl";
315318
default:
316319
return "none";
317320
}

0 commit comments

Comments
 (0)