-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[Offload]: Skip copying of unused kernel-mapped data #124723
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
6e42d52
12561c4
10860d1
02f78ec
153b3f9
de77e4b
050c5e3
99bd34b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1197,6 +1197,97 @@ class PrivateArgumentManagerTy { | |
} | ||
}; | ||
|
||
/// Try to determine if kernel argument is unused. This method | ||
/// takes a conservative approach, i.e. it may return false | ||
/// negatives but it should never return a false positive. | ||
static bool isArgUnused(tgt_map_type ArgType) { | ||
bool IsArgUnused = ArgType == OMP_TGT_MAPTYPE_NONE || | ||
ArgType == OMP_TGT_MAPTYPE_FROM || | ||
ArgType == OMP_TGT_MAPTYPE_TO || | ||
ArgType == (OMP_TGT_MAPTYPE_FROM | OMP_TGT_MAPTYPE_TO); | ||
return IsArgUnused; | ||
} | ||
|
||
/// Try to find redundant mappings associated with a kernel launch, | ||
/// and provide a masked version of the kernel argument types that | ||
/// avoid redundant data transfers between the host and the device. | ||
static std::unique_ptr<int64_t[]> maskRedundantTransfers(DeviceTy &Device, int32_t ArgNum, | ||
int64_t *ArgTypes, int64_t *ArgSizes, | ||
map_var_info_t *ArgNames, void **ArgPtrs, | ||
void **ArgMappers) { | ||
std::unique_ptr<int64_t[]> ArgTypesOverride = std::make_unique<int64_t[]>(ArgNum); | ||
|
||
bool AllArgsUnused = true; | ||
|
||
for (int32_t I = 0; I < ArgNum; ++I) { | ||
bool IsCustomMapped = ArgMappers && ArgMappers[I]; | ||
|
||
if (IsCustomMapped) { | ||
ArgTypesOverride[I] = ArgTypes[I]; | ||
AllArgsUnused = false; | ||
continue; | ||
} | ||
|
||
tgt_map_type ArgType = (tgt_map_type) ArgTypes[I]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Clang format the commit. (sth like |
||
|
||
bool IsArgUnused = false; | ||
|
||
// Check for unused `map(buf[0:size])` mappings | ||
IsArgUnused |= isArgUnused(ArgType); | ||
|
||
bool IsArgMemberPtr = ArgType & OMP_TGT_MAPTYPE_MEMBER_OF && | ||
ArgType & OMP_TGT_MAPTYPE_PTR_AND_OBJ; | ||
|
||
tgt_map_type ArgTypeMemberPtrMasked = | ||
(tgt_map_type)(ArgType & ~(OMP_TGT_MAPTYPE_MEMBER_OF | | ||
OMP_TGT_MAPTYPE_PTR_AND_OBJ)); | ||
|
||
// Check for unused `map(wrapper.buf[0:size])` mappings | ||
IsArgUnused |= AllArgsUnused && IsArgMemberPtr && | ||
isArgUnused(ArgTypeMemberPtrMasked); | ||
Comment on lines
+1246
to
+1247
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking at AllArgsUnused here seems surprising. I expect you need to check the object this is a member of? I believe they are sorted, hence, it is always the last argument. Looking at all causes weird ordering interactions and prevents removal easily. |
||
|
||
if (!IsArgUnused) { | ||
ArgTypesOverride[I] = ArgTypes[I]; | ||
AllArgsUnused = false; | ||
continue; | ||
} | ||
|
||
MappingInfoTy &MappingInfo = Device.getMappingInfo(); | ||
MappingInfoTy::HDTTMapAccessorTy HDTTMap = | ||
MappingInfo.HostDataToTargetMap.getExclusiveAccessor(); | ||
|
||
bool IsExistingMapping = | ||
!MappingInfo.lookupMapping(HDTTMap, ArgPtrs[I], ArgSizes[I]) | ||
.isEmpty(); | ||
|
||
if (IsExistingMapping) { | ||
ArgTypesOverride[I] = ArgTypes[I]; | ||
AllArgsUnused = false; | ||
continue; | ||
} | ||
|
||
[[maybe_unused]] const std::string Name = | ||
ArgNames && ArgNames[I] ? getNameFromMapping(ArgNames[I]) | ||
: std::string("unknown"); | ||
|
||
bool IsArgFrom = ArgType & OMP_TGT_MAPTYPE_FROM; | ||
bool IsArgTo = ArgType & OMP_TGT_MAPTYPE_TO; | ||
|
||
[[maybe_unused]] const char *Type = IsArgFrom && IsArgTo ? "tofrom" | ||
: IsArgFrom ? "from" | ||
: IsArgTo ? "to" | ||
: "unknown"; | ||
|
||
INFO(OMP_INFOTYPE_REDUNDANT_TRANSFER, Device.DeviceID, "%s(%s)[%" PRId64 "] %s\n", Type, | ||
Name.c_str(), ArgSizes[I], "is not used and will not be copied"); | ||
|
||
ArgTypesOverride[I] = | ||
ArgType & ~(OMP_TGT_MAPTYPE_TO | OMP_TGT_MAPTYPE_FROM); | ||
} | ||
|
||
return ArgTypesOverride; | ||
} | ||
|
||
/// Process data before launching the kernel, including calling targetDataBegin | ||
/// to map and transfer data to target device, transferring (first-)private | ||
/// variables. | ||
|
@@ -1417,11 +1508,16 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, | |
|
||
int NumClangLaunchArgs = KernelArgs.NumArgs; | ||
int Ret = OFFLOAD_SUCCESS; | ||
|
||
std::unique_ptr<int64_t[]> ArgTypesOverride = | ||
maskRedundantTransfers(Device, NumClangLaunchArgs, KernelArgs.ArgTypes, | ||
KernelArgs.ArgSizes, KernelArgs.ArgNames, KernelArgs.ArgPtrs, KernelArgs.ArgMappers); | ||
|
||
if (NumClangLaunchArgs) { | ||
// Process data, such as data mapping, before launching the kernel | ||
Ret = processDataBefore(Loc, DeviceId, HostPtr, NumClangLaunchArgs, | ||
KernelArgs.ArgBasePtrs, KernelArgs.ArgPtrs, | ||
KernelArgs.ArgSizes, KernelArgs.ArgTypes, | ||
KernelArgs.ArgSizes, ArgTypesOverride.get(), | ||
KernelArgs.ArgNames, KernelArgs.ArgMappers, TgtArgs, | ||
TgtOffsets, PrivateArgumentManager, AsyncInfo); | ||
if (Ret != OFFLOAD_SUCCESS) { | ||
|
@@ -1473,7 +1569,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, | |
// variables | ||
Ret = processDataAfter(Loc, DeviceId, HostPtr, NumClangLaunchArgs, | ||
KernelArgs.ArgBasePtrs, KernelArgs.ArgPtrs, | ||
KernelArgs.ArgSizes, KernelArgs.ArgTypes, | ||
KernelArgs.ArgSizes, ArgTypesOverride.get(), | ||
KernelArgs.ArgNames, KernelArgs.ArgMappers, | ||
PrivateArgumentManager, AsyncInfo); | ||
if (Ret != OFFLOAD_SUCCESS) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
// clang-format off | ||
// RUN: %libomptarget-compilexx-generic | ||
// RUN: env LIBOMPTARGET_INFO=160 %libomptarget-run-generic 2>&1 \ | ||
// RUN: | %fcheck-generic | ||
|
||
// REQUIRES: gpu | ||
// clang-format on | ||
|
||
int main() { | ||
float DataStack = 0; | ||
|
||
// CHECK: omptarget device 0 info: Copying data from device to host, | ||
// TgtPtr=0x{{.*}}, HstPtr=0x{{.*}}, Size=4, Name=unknown | ||
#pragma omp target map(from : DataStack) | ||
{ | ||
DataStack = 1; | ||
} | ||
|
||
// CHECK: omptarget device 0 info: Copying data from host to device, | ||
// HstPtr=0x{{.*}}, TgtPtr=0x{{.*}}, Size=4, Name=unknown | ||
#pragma omp target map(always to : DataStack) | ||
; | ||
|
||
// CHECK: omptarget device 0 info: tofrom(unknown)[4] is not used and will not | ||
// be copied | ||
#pragma omp target map(tofrom : DataStack) | ||
; | ||
|
||
int Size = 16; | ||
double *Data = new double[Size]; | ||
|
||
// CHECK: omptarget device 0 info: Copying data from host to device, | ||
// HstPtr=0x{{.*}}, TgtPtr=0x{{.*}}, Size=8, Name=unknown CHECK: omptarget | ||
// device 0 info: Copying data from device to host, TgtPtr=0x{{.*}}, | ||
// HstPtr=0x{{.*}}, Size=8, Name=unknown | ||
#pragma omp target map(tofrom : Data[0 : 1]) | ||
{ | ||
Data[0] = 1; | ||
} | ||
|
||
// CHECK: omptarget device 0 info: Copying data from host to device, | ||
// HstPtr=0x{{.*}}, TgtPtr=0x{{.*}}, Size=16, Name=unknown CHECK: omptarget | ||
// device 0 info: Copying data from device to host, TgtPtr=0x{{.*}}, | ||
// HstPtr=0x{{.*}}, Size=16, Name=unknown | ||
#pragma omp target map(always tofrom : Data[0 : 2]) | ||
; | ||
|
||
// CHECK: omptarget device 0 info: from(unknown)[24] is not used and will not be | ||
// copied | ||
#pragma omp target map(from : Data[0 : 3]) | ||
; | ||
|
||
// CHECK: omptarget device 0 info: to(unknown)[24] is not used and will not be | ||
// copied | ||
#pragma omp target map(to : Data[0 : 3]) | ||
; | ||
|
||
// CHECK: omptarget device 0 info: tofrom(unknown)[32] is not used and will not | ||
// be copied | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This file got formatted which breaks the checks. |
||
#pragma omp target map(tofrom : Data[0 : 4]) | ||
; | ||
|
||
// CHECK: omptarget device 0 info: Copying data from host to device, | ||
// HstPtr=0x{{.*}}, TgtPtr=0x{{.*}}, Size=40, Name=unknown | ||
#pragma omp target map(to : Data[0 : 5]) | ||
{ | ||
#pragma omp teams | ||
Data[0] = 1; | ||
} | ||
|
||
struct { | ||
double *Data; | ||
} Wrapper{.Data = Data}; | ||
|
||
// CHECK: omptarget device 0 info: Copying data from host to device, | ||
// HstPtr=0x{{.*}}, TgtPtr=0x{{.*}}, Size=48, Name=unknown CHECK: omptarget | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here the check lines are completely jumbled. |
||
// device 0 info: Copying data from device to host, TgtPtr=0x{{.*}}, | ||
// HstPtr=0x{{.*}}, Size=48, Name=unknown | ||
#pragma omp target map(tofrom : Wrapper.Data[0 : 6]) | ||
{ | ||
Wrapper.Data[0] = 1; | ||
} | ||
|
||
// CHECK: omptarget device 0 info: unknown(unknown)[8] is not used and will not be copied | ||
// CHECK: omptarget device 0 info: tofrom(unknown)[56] is not used and will not be copied | ||
#pragma omp target map(tofrom: Wrapper.Data[0:7]) | ||
; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This deserves some more explanation. Why are these arg types "unused". Basically explain that they have no explicit or implicit variable use in the region.