Skip to content

Commit 226d96d

Browse files
committed
[SYCL] Add clang-linker-wrapper changes to call clang-sycl-linker for SYCL offloads
Device code linking happens inside clang-linker-wrapper. In the current implementation, clang-linker-wrapper does the following: 1. Extracts device code. Input_1, Input_2,..... 2. Group device code according to target devices Inputs[triple_1] = .... Inputs[triple_2] = .... 3. For each group, i.e. Inputs[triple_i], a. Gather all the offload kinds found inside those inputs in ActiveOffloadKinds b. Link all images inside Inputs[triple_i] by calling clang --target=triple_i .... c. Create a copy of that linked image for each offload kind and add it to Output[Kind] list. In SYCL compilation flow, there is a deviation in Step 3b. We call device code splitting inside the 'clang --target=triple_i ....' call and the output is now a 'packaged' file containing multiple device images. This deviation requires us to capture the OffloadKind during the linking stage and pass it along to the linking function (clang), so that clang can be called with a unique option '--sycl-link' that will help us to call 'clang-sycl-linker' under the hood (clang-sycl-linker will do SYCL specific linking). Our current objective is to implement an end-to-end SYCL offloading flow and get it working. We will eventually merge our approach with the community flow. Signed-off-by: Arvind Sudarsanam <[email protected]>
1 parent 5216633 commit 226d96d

File tree

6 files changed

+148
-23
lines changed

6 files changed

+148
-23
lines changed

Diff for: clang/docs/ClangOffloadPackager.rst

+2
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ the following values for the :ref:`offload kind<table-offload_kind>` and the
112112
+------------+-------+---------------------------------------+
113113
| OFK_HIP | 0x03 | The producer was HIP |
114114
+------------+-------+---------------------------------------+
115+
| OFK_SYCL | 0x04 | The producer was SYCL |
116+
+------------+-------+---------------------------------------+
115117

116118
The flags are used to signify certain conditions, such as the presence of
117119
debugging information or whether or not LTO was used. The string entry table is

Diff for: clang/test/Driver/linker-wrapper.c

+10
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
// REQUIRES: x86-registered-target
33
// REQUIRES: nvptx-registered-target
44
// REQUIRES: amdgpu-registered-target
5+
// REQUIRES: spirv-registered-target
56

67
// An externally visible variable so static libraries extract.
78
__attribute__((visibility("protected"), used)) int x;
89

910
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o
1011
// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -o %t.nvptx.bc
1112
// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -o %t.amdgpu.bc
13+
// RUN: %clang -cc1 %s -triple spirv64-unknown-unknown -emit-llvm-bc -o %t.spirv.bc
1214

1315
// RUN: clang-offload-packager -o %t.out \
1416
// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
@@ -49,6 +51,14 @@ __attribute__((visibility("protected"), used)) int x;
4951

5052
// AMDGPU-LTO-TEMPS: clang{{.*}} --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -flto {{.*}}-save-temps
5153

54+
// RUN: clang-offload-packager -o %t.out \
55+
// RUN: --image=file=%t.spirv.bc,kind=sycl,triple=spirv64-unknown-unknown,arch=generic
56+
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
57+
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
58+
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=SPIRV-LINK
59+
60+
// SPIRV-LINK: clang{{.*}} -o {{.*}}.img --target=spirv64-unknown-unknown {{.*}}.o --sycl-link -Xlinker -triple=spirv64-unknown-unknown -Xlinker -arch=
61+
5262
// RUN: clang-offload-packager -o %t.out \
5363
// RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
5464
// RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu

Diff for: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

+67-4
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,8 @@ fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
464464
} // namespace amdgcn
465465

466466
namespace generic {
467-
Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
467+
Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
468+
bool HasSYCLOffloadKind = false) {
468469
llvm::TimeTraceScope TimeScope("Clang");
469470
// Use `clang` to invoke the appropriate device tools.
470471
Expected<std::string> ClangPath =
@@ -554,6 +555,17 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
554555
if (Args.hasArg(OPT_embed_bitcode))
555556
CmdArgs.push_back("-Wl,--lto-emit-llvm");
556557

558+
// For linking device code with the SYCL offload kind, special handling is
559+
// required. Passing --sycl-link to clang results in a call to
560+
// clang-sycl-linker. Additional linker flags required by clang-sycl-linker
561+
// will be communicated via the -Xlinker option.
562+
if (HasSYCLOffloadKind) {
563+
CmdArgs.push_back("--sycl-link");
564+
CmdArgs.append(
565+
{"-Xlinker", Args.MakeArgString("-triple=" + Triple.getTriple())});
566+
CmdArgs.append({"-Xlinker", Args.MakeArgString("-arch=" + Arch)});
567+
}
568+
557569
for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ))
558570
CmdArgs.append({"-Xlinker", Args.MakeArgString(Arg)});
559571
for (StringRef Arg : Args.getAllArgValues(OPT_compiler_arg_EQ))
@@ -567,7 +579,8 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
567579
} // namespace generic
568580

569581
Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
570-
const ArgList &Args) {
582+
const ArgList &Args,
583+
bool HasSYCLOffloadKind = false) {
571584
const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
572585
switch (Triple.getArch()) {
573586
case Triple::nvptx:
@@ -582,7 +595,7 @@ Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
582595
case Triple::spirv64:
583596
case Triple::systemz:
584597
case Triple::loongarch64:
585-
return generic::clang(InputFiles, Args);
598+
return generic::clang(InputFiles, Args, HasSYCLOffloadKind);
586599
default:
587600
return createStringError(Triple.getArchName() +
588601
" linking is not supported");
@@ -924,9 +937,20 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
924937
auto LinkerArgs = getLinkerArgs(Input, BaseArgs);
925938

926939
DenseSet<OffloadKind> ActiveOffloadKinds;
927-
for (const auto &File : Input)
940+
// Currently, SYCL device code linking process differs from generic device
941+
// code linking.
942+
// TODO: Remove check for offload kind, once SYCL device code linking is
943+
// aligned with generic linking.
944+
bool HasSYCLOffloadKind = false;
945+
bool HasNonSYCLOffloadKind = false;
946+
for (const auto &File : Input) {
928947
if (File.getBinary()->getOffloadKind() != OFK_None)
929948
ActiveOffloadKinds.insert(File.getBinary()->getOffloadKind());
949+
if (File.getBinary()->getOffloadKind() == OFK_SYCL)
950+
HasSYCLOffloadKind = true;
951+
else
952+
HasNonSYCLOffloadKind = true;
953+
}
930954

931955
// Write any remaining device inputs to an output file.
932956
SmallVector<StringRef> InputFiles;
@@ -937,13 +961,47 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
937961
InputFiles.emplace_back(*FileNameOrErr);
938962
}
939963

964+
if (HasSYCLOffloadKind) {
965+
// Link the remaining device files using the device linker.
966+
auto OutputOrErr = linkDevice(InputFiles, LinkerArgs, HasSYCLOffloadKind);
967+
if (!OutputOrErr)
968+
return OutputOrErr.takeError();
969+
// Output is a packaged object of device images. Unpackage the images and
970+
// copy them to Images[Kind]
971+
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
972+
MemoryBuffer::getFileOrSTDIN(*OutputOrErr);
973+
if (std::error_code EC = BufferOrErr.getError())
974+
return createFileError(*OutputOrErr, EC);
975+
976+
MemoryBufferRef Buffer = **BufferOrErr;
977+
SmallVector<OffloadFile> Binaries;
978+
if (Error Err = extractOffloadBinaries(Buffer, Binaries))
979+
return std::move(Err);
980+
for (auto &OffloadFile : Binaries) {
981+
auto TheBinary = OffloadFile.getBinary();
982+
OffloadingImage TheImage{};
983+
TheImage.TheImageKind = TheBinary->getImageKind();
984+
TheImage.TheOffloadKind = TheBinary->getOffloadKind();
985+
TheImage.StringData["triple"] = TheBinary->getTriple();
986+
TheImage.StringData["arch"] = TheBinary->getArch();
987+
TheImage.Image = MemoryBuffer::getMemBufferCopy(TheBinary->getImage());
988+
Images[OFK_SYCL].emplace_back(std::move(TheImage));
989+
}
990+
}
991+
992+
if (!HasNonSYCLOffloadKind)
993+
return Error::success();
994+
940995
// Link the remaining device files using the device linker.
941996
auto OutputOrErr = linkDevice(InputFiles, LinkerArgs);
942997
if (!OutputOrErr)
943998
return OutputOrErr.takeError();
944999

9451000
// Store the offloading image for each linked output file.
9461001
for (OffloadKind Kind : ActiveOffloadKinds) {
1002+
// For SYCL, Offloading images were created inside clang-sycl-linker
1003+
if (Kind == OFK_SYCL)
1004+
continue;
9471005
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
9481006
llvm::MemoryBuffer::getFileOrSTDIN(*OutputOrErr);
9491007
if (std::error_code EC = FileOrErr.getError()) {
@@ -986,6 +1044,11 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
9861044
A.StringData["arch"] > B.StringData["arch"] ||
9871045
A.TheOffloadKind < B.TheOffloadKind;
9881046
});
1047+
if (Kind == OFK_SYCL) {
1048+
// TODO: Update once SYCL offload wrapping logic is available.
1049+
reportError(
1050+
createStringError("SYCL offload wrapping logic is not available"));
1051+
}
9891052
auto BundledImagesOrErr = bundleLinkedOutput(Input, Args, Kind);
9901053
if (!BundledImagesOrErr)
9911054
return BundledImagesOrErr.takeError();

Diff for: clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp

+65-19
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ static StringRef OutputFile;
7070
/// Directory to dump SPIR-V IR if requested by user.
7171
static SmallString<128> SPIRVDumpDir;
7272

73+
using OffloadingImage = OffloadBinary::OffloadingImage;
74+
7375
static void printVersion(raw_ostream &OS) {
7476
OS << clang::getClangToolFullVersion("clang-sycl-linker") << '\n';
7577
}
@@ -168,10 +170,10 @@ Expected<SmallVector<std::string>> getInput(const ArgList &Args) {
168170
/// are LLVM IR bitcode files.
169171
// TODO: Support SPIR-V IR files.
170172
Expected<std::unique_ptr<Module>> getBitcodeModule(StringRef File,
171-
LLVMContext &C) {
173+
LLVMContext &Ctx) {
172174
SMDiagnostic Err;
173175

174-
auto M = getLazyIRFileModule(File, Err, C);
176+
auto M = getLazyIRFileModule(File, Err, Ctx);
175177
if (M)
176178
return std::move(M);
177179
return createStringError(Err.getMessage());
@@ -211,16 +213,16 @@ Expected<SmallVector<std::string>> getSYCLDeviceLibs(const ArgList &Args) {
211213
/// 3. Link all the images gathered in Step 2 with the output of Step 1 using
212214
/// linkInModule API. LinkOnlyNeeded flag is used.
213215
Expected<StringRef> linkDeviceCode(ArrayRef<std::string> InputFiles,
214-
const ArgList &Args, LLVMContext &C) {
216+
const ArgList &Args, LLVMContext &Ctx) {
215217
llvm::TimeTraceScope TimeScope("SYCL link device code");
216218

217219
assert(InputFiles.size() && "No inputs to link");
218220

219-
auto LinkerOutput = std::make_unique<Module>("sycl-device-link", C);
221+
auto LinkerOutput = std::make_unique<Module>("sycl-device-link", Ctx);
220222
Linker L(*LinkerOutput);
221223
// Link SYCL device input files.
222224
for (auto &File : InputFiles) {
223-
auto ModOrErr = getBitcodeModule(File, C);
225+
auto ModOrErr = getBitcodeModule(File, Ctx);
224226
if (!ModOrErr)
225227
return ModOrErr.takeError();
226228
if (L.linkInModule(std::move(*ModOrErr)))
@@ -235,7 +237,7 @@ Expected<StringRef> linkDeviceCode(ArrayRef<std::string> InputFiles,
235237
// Link in SYCL device library files.
236238
const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
237239
for (auto &File : *SYCLDeviceLibFiles) {
238-
auto LibMod = getBitcodeModule(File, C);
240+
auto LibMod = getBitcodeModule(File, Ctx);
239241
if (!LibMod)
240242
return LibMod.takeError();
241243
if ((*LibMod)->getTargetTriple() == Triple) {
@@ -278,18 +280,18 @@ Expected<StringRef> linkDeviceCode(ArrayRef<std::string> InputFiles,
278280
/// Converts 'File' from LLVM bitcode to SPIR-V format using SPIR-V backend.
279281
/// 'Args' encompasses all arguments required for linking device code and will
280282
/// be parsed to generate options required to be passed into the backend.
281-
static Expected<StringRef> runSPIRVCodeGen(StringRef File, const ArgList &Args,
282-
LLVMContext &C) {
283+
static Error runSPIRVCodeGen(StringRef File, const ArgList &Args,
284+
StringRef OutputFile, LLVMContext &Ctx) {
283285
llvm::TimeTraceScope TimeScope("SPIR-V code generation");
284286

285287
// Parse input module.
286-
SMDiagnostic Err;
287-
std::unique_ptr<Module> M = parseIRFile(File, Err, C);
288+
SMDiagnostic E;
289+
std::unique_ptr<Module> M = parseIRFile(File, E, Ctx);
288290
if (!M)
289-
return createStringError(Err.getMessage());
291+
return createStringError(E.getMessage());
290292

291293
if (Error Err = M->materializeAll())
292-
return std::move(Err);
294+
return Err;
293295

294296
Triple TargetTriple(Args.getLastArgValue(OPT_triple_EQ));
295297
M->setTargetTriple(TargetTriple);
@@ -333,7 +335,7 @@ static Expected<StringRef> runSPIRVCodeGen(StringRef File, const ArgList &Args,
333335
errs() << formatv("SPIR-V Backend: input: {0}, output: {1}\n", File,
334336
OutputFile);
335337

336-
return OutputFile;
338+
return Error::success();
337339
}
338340

339341
/// Performs the following steps:
@@ -342,17 +344,61 @@ static Expected<StringRef> runSPIRVCodeGen(StringRef File, const ArgList &Args,
342344
Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) {
343345
llvm::TimeTraceScope TimeScope("SYCL device linking");
344346

345-
LLVMContext C;
347+
LLVMContext Ctx;
346348

347349
// Link all input bitcode files and SYCL device library files, if any.
348-
auto LinkedFile = linkDeviceCode(Files, Args, C);
350+
auto LinkedFile = linkDeviceCode(Files, Args, Ctx);
349351
if (!LinkedFile)
350352
reportError(LinkedFile.takeError());
351353

354+
// TODO: SYCL post link functionality involves device code splitting and will
355+
// result in multiple bitcode codes.
356+
// The following lines are placeholders to represent multiple files and will
357+
// be refactored once SYCL post link support is available.
358+
SmallVector<std::string> SplitModules;
359+
SplitModules.emplace_back(*LinkedFile);
360+
352361
// SPIR-V code generation step.
353-
auto SPVFile = runSPIRVCodeGen(*LinkedFile, Args, C);
354-
if (!SPVFile)
355-
return SPVFile.takeError();
362+
for (size_t I = 0, E = SplitModules.size(); I != E; ++I) {
363+
auto Stem = OutputFile.rsplit('.').first;
364+
std::string SPVFile(Stem);
365+
SPVFile.append("_" + utostr(I) + ".spv");
366+
auto Err = runSPIRVCodeGen(SplitModules[I], Args, SPVFile, Ctx);
367+
if (Err)
368+
return std::move(Err);
369+
SplitModules[I] = SPVFile;
370+
}
371+
372+
// Write the final output into file.
373+
int FD = -1;
374+
if (std::error_code EC = sys::fs::openFileForWrite(OutputFile, FD))
375+
return errorCodeToError(EC);
376+
llvm::raw_fd_ostream FS(FD, /*shouldClose=*/true);
377+
378+
for (size_t I = 0, E = SplitModules.size(); I != E; ++I) {
379+
auto File = SplitModules[I];
380+
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
381+
llvm::MemoryBuffer::getFileOrSTDIN(File);
382+
if (std::error_code EC = FileOrErr.getError()) {
383+
if (DryRun)
384+
FileOrErr = MemoryBuffer::getMemBuffer("");
385+
else
386+
return createFileError(File, EC);
387+
}
388+
OffloadingImage TheImage{};
389+
TheImage.TheImageKind = IMG_Object;
390+
TheImage.TheOffloadKind = OFK_SYCL;
391+
TheImage.StringData["triple"] =
392+
Args.MakeArgString(Args.getLastArgValue(OPT_triple_EQ));
393+
TheImage.StringData["arch"] =
394+
Args.MakeArgString(Args.getLastArgValue(OPT_arch_EQ));
395+
TheImage.Image = std::move(*FileOrErr);
396+
397+
llvm::SmallString<0> Buffer = OffloadBinary::write(TheImage);
398+
if (Buffer.size() % OffloadBinary::getAlignment() != 0)
399+
return createStringError("Offload binary has invalid size alignment");
400+
FS << Buffer;
401+
}
356402
return Error::success();
357403
}
358404

@@ -394,7 +440,7 @@ int main(int argc, char **argv) {
394440
DryRun = Args.hasArg(OPT_dry_run);
395441
SaveTemps = Args.hasArg(OPT_save_temps);
396442

397-
OutputFile = "a.spv";
443+
OutputFile = "a.out";
398444
if (Args.hasArg(OPT_o))
399445
OutputFile = Args.getLastArgValue(OPT_o);
400446

Diff for: llvm/include/llvm/Object/OffloadBinary.h

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ enum OffloadKind : uint16_t {
3535
OFK_OpenMP,
3636
OFK_Cuda,
3737
OFK_HIP,
38+
OFK_SYCL,
3839
OFK_LAST,
3940
};
4041

Diff for: llvm/lib/Object/OffloadBinary.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ OffloadKind object::getOffloadKind(StringRef Name) {
301301
.Case("openmp", OFK_OpenMP)
302302
.Case("cuda", OFK_Cuda)
303303
.Case("hip", OFK_HIP)
304+
.Case("sycl", OFK_SYCL)
304305
.Default(OFK_None);
305306
}
306307

@@ -312,6 +313,8 @@ StringRef object::getOffloadKindName(OffloadKind Kind) {
312313
return "cuda";
313314
case OFK_HIP:
314315
return "hip";
316+
case OFK_SYCL:
317+
return "sycl";
315318
default:
316319
return "none";
317320
}

0 commit comments

Comments
 (0)