Skip to content

Commit 5f49f35

Browse files
merge kernels in existing XCLBIN
1 parent 5241ec9 commit 5f49f35

File tree

2 files changed

+78
-23
lines changed

2 files changed

+78
-23
lines changed

compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp

+60-15
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,8 @@ LogicalResult AIETargetBackend::serializeExecutable(
268268
SmallVector<uint32_t> xclbinIndices(ordinalCount);
269269
SmallVector<uint32_t> asmInstrIndices(ordinalCount);
270270

271+
SmallVector<SmallString<128>> xclbinPaths;
272+
271273
for (size_t i = 0; i < entryPointNames.size(); i++) {
272274
uint64_t ordinal = entryPointOrdinals.at(entryPointNames[i]);
273275

@@ -300,18 +302,34 @@ LogicalResult AIETargetBackend::serializeExecutable(
300302
llvm::sys::path::append(npuInstPath,
301303
entryPointNamesFb[ordinal] + ".npu.txt");
302304

303-
SmallVector<StringRef> cmdArgs{aie2xclbin,
304-
inputMlirPath,
305-
"--peano",
306-
options.peanoInstallDir,
307-
"--xclbin-name",
308-
xclbinPath,
309-
"--npu-insts-name",
310-
npuInstPath,
311-
"--xclbin-kernel-name",
312-
entryPointNamesFb[ordinal],
313-
"--tmpdir",
314-
entryPointWorkDir};
305+
// Convert ordinal to hexadecimal string for xclbin kern id
306+
std::stringstream ss;
307+
ss << "0x" << std::hex << ordinal + 10;
308+
std::string ordinalHex = ss.str();
309+
310+
SmallVector<StringRef> cmdArgs;
311+
SmallVector<StringRef> cmdArgsBase{aie2xclbin,
312+
inputMlirPath,
313+
"--peano",
314+
options.peanoInstallDir,
315+
"--xclbin-name",
316+
xclbinPath,
317+
"--npu-insts-name",
318+
npuInstPath,
319+
"--xclbin-kernel-name",
320+
entryPointNamesFb[ordinal],
321+
"--tmpdir",
322+
entryPointWorkDir,
323+
"--xclbin-kernel-id",
324+
ordinalHex};
325+
cmdArgs = cmdArgsBase;
326+
bool AttemptingMerge = false;
327+
if (i > 0) {
328+
cmdArgs.push_back("--input-xclbin-name");
329+
cmdArgs.push_back(xclbinPaths.back());
330+
AttemptingMerge = true;
331+
}
332+
xclbinPaths.push_back(xclbinPath);
315333

316334
auto addOpt = [&](StringRef arg, bool value) {
317335
if (value) cmdArgs.push_back(arg);
@@ -350,11 +368,24 @@ LogicalResult AIETargetBackend::serializeExecutable(
350368
{
351369
SmallVector<StringRef> cmdEnvRefs{cmdEnv.begin(), cmdEnv.end()};
352370
int result = llvm::sys::ExecuteAndWait(cmdArgs[0], cmdArgs, cmdEnvRefs);
353-
if (result != 0)
371+
if (result != 0 && AttemptingMerge) {
372+
// we failed to create xclbin but maybe we failed becuase we were trying
373+
// to merge the kerenel in exisiting kernel, try again to see if perhaps
374+
// we have success if we dont try to merge.
375+
AttemptingMerge = false;
376+
result =
377+
llvm::sys::ExecuteAndWait(cmdArgsBase[0], cmdArgsBase, cmdEnvRefs);
378+
xclbinPaths.push_back(xclbinPath);
379+
}
380+
if (result != 0) {
354381
return moduleOp.emitOpError(
355382
"Failed to produce an XCLBin with external tool.");
383+
}
384+
// delete the previous xclbin if we were able to merge as the new one now
385+
// will have all the kernels from the previous one.
386+
if (AttemptingMerge) xclbinPaths.erase(xclbinPaths.end() - 2);
387+
xclbinIndices[ordinal] = xclbinPaths.size() - 1;
356388
}
357-
358389
std::ifstream instrFile(static_cast<std::string>(npuInstPath));
359390
std::string line;
360391
while (std::getline(instrFile, line)) {
@@ -369,7 +400,7 @@ LogicalResult AIETargetBackend::serializeExecutable(
369400
asmInstrIndices[ordinal] = asmInstrRefs.size();
370401
asmInstrRefs.push_back(
371402
iree_amd_aie_hal_xrt_AsmInstDef_create(builder, npuInstrsVec));
372-
403+
/*
373404
xclbinIn = openInputFile(xclbinPath, &errorMessage);
374405
if (!xclbinIn) {
375406
moduleOp.emitOpError() << "Failed to open xclbin file: " << errorMessage;
@@ -378,7 +409,21 @@ LogicalResult AIETargetBackend::serializeExecutable(
378409
xclbinIndices[ordinal] = xclbinRefs.size();
379410
xclbinRefs.push_back(
380411
iree_amd_aie_hal_xrt_XclbinDef_create(builder, xclbinStringRef));
412+
*/
413+
}
414+
// write out the final xclbins to flatbuffer
415+
for (auto xclbinPath : xclbinPaths) {
416+
llvm::outs() << "writing xclbin from path: " << xclbinPath << "\n";
417+
std::string errorMessage;
418+
xclbinIn = openInputFile(xclbinPath, &errorMessage);
419+
if (!xclbinIn) {
420+
moduleOp.emitOpError() << "Failed to open xclbin file: " << errorMessage;
421+
}
422+
auto xclbinStringRef = builder.createString(xclbinIn->getBuffer());
423+
xclbinRefs.push_back(
424+
iree_amd_aie_hal_xrt_XclbinDef_create(builder, xclbinStringRef));
381425
}
426+
382427
// Serialize the executable to flatbuffer format
383428
auto entryPointsRef = builder.createStringVec(entryPointNamesFb);
384429

runtime/src/iree-amd-aie/driver/xrt/native_executable.cc

+18-8
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@ iree_status_t iree_hal_xrt_native_executable_create(
128128
iree_amd_aie_hal_xrt_XclbinDef_vec_t xclbins_vec =
129129
iree_amd_aie_hal_xrt_ExecutableDef_xclbins_get(executable_def);
130130

131+
iree_host_size_t number_xclbin =
132+
iree_amd_aie_hal_xrt_XclbinDef_vec_len(xclbins_vec);
133+
131134
iree_amd_aie_hal_xrt_AsmInstDef_vec_t asm_instrs_vec =
132135
iree_amd_aie_hal_xrt_ExecutableDef_asm_instrs_get(executable_def);
133136

@@ -163,17 +166,15 @@ iree_status_t iree_hal_xrt_native_executable_create(
163166
&executable->resource);
164167
executable->host_allocator = host_allocator;
165168
executable->entry_point_count = entry_point_count;
166-
for (iree_host_size_t entry_ordinal = 0; entry_ordinal < entry_point_count;
167-
entry_ordinal++) {
168-
const char* entry_name =
169-
flatbuffers_string_vec_at(entry_points_vec, entry_ordinal);
170-
uint32_t xclbin_index =
171-
flatbuffers_uint32_vec_at(xclbin_indices_vec, entry_ordinal);
169+
// collect all the hardware contexts first as muliple entry points can map to
170+
// the same context and this way we dont need to keep reloading them.
171+
std::vector<xrt::hw_context> contexts;
172+
for (iree_host_size_t xclbin_index = 0; xclbin_index < number_xclbin;
173+
xclbin_index++) {
172174
iree_amd_aie_hal_xrt_XclbinDef_table_t xclbin_def =
173175
iree_amd_aie_hal_xrt_XclbinDef_vec_at(xclbins_vec, xclbin_index);
174176
flatbuffers_string_t xclbin_fb =
175177
iree_amd_aie_hal_xrt_XclbinDef_xclbin_get(xclbin_def);
176-
177178
// XRT API needs this vector and cant actually read a void*.
178179
std::vector<char> xclbinVector(
179180
xclbin_fb, xclbin_fb + flatbuffers_string_len(xclbin_fb));
@@ -186,6 +187,14 @@ iree_status_t iree_hal_xrt_native_executable_create(
186187
}
187188
device.register_xclbin(xclbin);
188189
xrt::hw_context context(device, xclbin.get_uuid());
190+
contexts.push_back(context);
191+
}
192+
for (iree_host_size_t entry_ordinal = 0; entry_ordinal < entry_point_count;
193+
entry_ordinal++) {
194+
const char* entry_name =
195+
flatbuffers_string_vec_at(entry_points_vec, entry_ordinal);
196+
uint32_t xclbin_index =
197+
flatbuffers_uint32_vec_at(xclbin_indices_vec, entry_ordinal);
189198
uint32_t asm_instr_index =
190199
flatbuffers_uint32_vec_at(asm_instr_indices_vec, entry_ordinal);
191200
iree_amd_aie_hal_xrt_AsmInstDef_table_t asminst_def =
@@ -196,7 +205,8 @@ iree_status_t iree_hal_xrt_native_executable_create(
196205
std::unique_ptr<xrt::kernel> kernel;
197206
std::unique_ptr<xrt::bo> instr;
198207
try {
199-
kernel = std::make_unique<xrt::kernel>(context, entry_name);
208+
kernel =
209+
std::make_unique<xrt::kernel>(contexts[xclbin_index], entry_name);
200210
// XCL_BO_FLAGS_CACHEABLE is used to indicate that this is an instruction
201211
// buffer that resides in instr_memory. This buffer is always passed as
202212
// the second argument to the kernel and we can use the

0 commit comments

Comments
 (0)