Skip to content

Commit 540bf13

Browse files
authored
Merge branch 'main' into fix/container-memory
2 parents f58351e + ec8979d commit 540bf13

File tree

12 files changed

+66
-28
lines changed

12 files changed

+66
-28
lines changed

.github/workflows/style.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,5 @@ jobs:
1212
- uses: actions/checkout@master
1313
- uses: reviewdog/action-cpplint@master
1414
with:
15-
github_token: ${{ secrets.github_token }}
15+
github_token: ${{ secrets.github_token }}
16+
args: --linelength=120

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ if(GIT_FOUND)
7070
#string(REPLACE "." "_" GIT_BRANCH ${GIT_BRANCH})
7171
string(REPLACE "-" "_" GIT_BRANCH ${GIT_BRANCH})
7272
string(REPLACE "/" "_" GIT_BRANCH ${GIT_BRANCH})
73-
message(STATUS "Git formated branch is ${GIT_BRANCH}")
73+
message(STATUS "Git formatted branch is ${GIT_BRANCH}")
7474
endif()
7575

7676
# Generate the static config header file

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@ English | [中文](README_CN.md)
44

55
## Introduction
66

7-
HAMi-core is the in-container gpu resource controller, it has beed adopted by [HAMi](https://github.com/Project-HAMi/HAMi), [volcano](https://github.com/volcano-sh/devices)
7+
HAMi-core is the in-container gpu resource controller, it has been adopted by [HAMi](https://github.com/Project-HAMi/HAMi), [volcano](https://github.com/volcano-sh/devices)
88

99
<img src="./docs/images/hami-arch.png" width = "600" />
1010

1111
## Features
1212

1313
HAMi-core has the following features:
14-
1. Virtualize device meory
14+
1. Virtualize device memory
1515
2. Limit device utilization by self-implemented time shard
1616
3. Real-time device utilization monitor
1717

src/cuda/hook.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ extern fp_dlsym real_dlsym;
1010
cuda_entry_t cuda_library_entry[] = {
1111
/* Init Part */
1212
{.name = "cuInit"},
13-
/* Deivce Part */
13+
/* Device Part */
1414
{.name = "cuDeviceGetAttribute"},
1515
{.name = "cuDeviceGet"},
1616
{.name = "cuDeviceGetCount"},
@@ -111,6 +111,7 @@ cuda_entry_t cuda_library_entry[] = {
111111
{.name = "cuFuncGetAttribute"},
112112
{.name = "cuFuncSetAttribute"},
113113
{.name = "cuLaunchKernel"},
114+
{.name = "cuLaunchKernelEx"},
114115
{.name = "cuLaunchCooperativeKernel"},
115116
/* cuEvent Part */
116117
{.name = "cuEventCreate"},
@@ -134,7 +135,7 @@ cuda_entry_t cuda_library_entry[] = {
134135
{.name = "cuMemCreate"},
135136
{.name = "cuMemRelease"},
136137
{.name = "cuMemMap"},
137-
{.name = "cucuMemImportFromShareableHandle"},
138+
{.name = "cuMemImportFromShareableHandle"},
138139
{.name = "cuMemAllocAsync"},
139140
{.name = "cuMemFreeAsync"},
140141
/* cuda11.7 new api memory part */
@@ -330,7 +331,7 @@ void *find_symbols_in_table_by_cudaversion(const char *symbol,int cudaVersion)
330331
const char *real_symbol;
331332
real_symbol = get_real_func_name(symbol,cudaVersion);
332333
if (real_symbol == NULL) {
333-
// if not find in mulit func version def, use origin logic
334+
// if not find in multi func version def, use origin logic
334335
pfn = find_symbols_in_table(symbol);
335336
} else {
336337
pfn = find_real_symbols_in_table(real_symbol);

src/cuda/memory.c

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ uint64_t compute_3d_array_alloc_bytes(const CUDA_ARRAY3D_DESCRIPTOR* desc) {
6868
}
6969
bytes *= cuarray_format_bytes[desc->Format];
7070

71-
// TODO: take acount of alignment and etc
71+
// TODO: take account of alignment and etc
7272
// bytes ++ ???
7373
return bytes;
7474
}
@@ -87,7 +87,7 @@ uint64_t compute_array_alloc_bytes(const CUDA_ARRAY_DESCRIPTOR* desc) {
8787
}
8888
bytes *= cuarray_format_bytes[desc->Format];
8989

90-
// TODO: take acount of alignment and etc
90+
// TODO: take account of alignment and etc
9191
// bytes ++ ???
9292
return bytes;
9393
}
@@ -118,7 +118,7 @@ CUresult cuArrayCreate_v2(CUarray* arr, const CUDA_ARRAY_DESCRIPTOR* desc) {
118118

119119
CUresult cuArrayDestroy(CUarray arr) {
120120
CUDA_ARRAY3D_DESCRIPTOR desc;
121-
LOG_DEBUG("cuArrayDestory");
121+
LOG_DEBUG("cuArrayDestroy");
122122
CHECK_DRV_API(cuArray3DGetDescriptor(&desc, arr));
123123
/*uint64_t bytes*/
124124
compute_3d_array_alloc_bytes(&desc);
@@ -281,14 +281,14 @@ CUresult cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount ){
281281
}
282282

283283
CUresult cuPointerGetAttribute ( void* data, CUpointer_attribute attribute, CUdeviceptr ptr ){
284-
LOG_DEBUG("cuPointGetAttribue data=%p attribute=%d ptr=%llx",data,(int)attribute,ptr);
284+
LOG_DEBUG("cuPointGetAttribute data=%p attribute=%d ptr=%llx", data, (int)attribute,ptr);
285285
ENSURE_RUNNING();
286286
CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuPointerGetAttribute,data,attribute,ptr);
287287
return res;
288288
}
289289

290290
CUresult cuPointerGetAttributes ( unsigned int numAttributes, CUpointer_attribute* attributes, void** data, CUdeviceptr ptr ) {
291-
LOG_DEBUG("cuPointGetAttribue data=%p ptr=%llx",data,ptr);
291+
LOG_DEBUG("cuPointGetAttribute data=%p ptr=%llx", data, ptr);
292292
ENSURE_RUNNING();
293293
CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuPointerGetAttributes,numAttributes,attributes,data,ptr);
294294
int cur=0;
@@ -307,7 +307,7 @@ CUresult cuPointerGetAttributes ( unsigned int numAttributes, CUpointer_attribu
307307
}
308308

309309
CUresult cuPointerSetAttribute ( const void* value, CUpointer_attribute attribute, CUdeviceptr ptr ){
310-
LOG_DEBUG("cuPointSetAttribue value=%p attribute=%d ptr=%llx",value,(int)attribute,ptr);
310+
LOG_DEBUG("cuPointSetAttribute value=%p attribute=%d ptr=%llx", value, (int)attribute, ptr);
311311
ENSURE_RUNNING();
312312
CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuPointerSetAttribute,value,attribute,ptr);
313313
return res;
@@ -542,7 +542,7 @@ CUresult cuMipmappedArrayCreate(CUmipmappedArray* pHandle,
542542

543543
CUresult cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray) {
544544
// TODO: compute bytesize
545-
LOG_DEBUG("cuMipmappedArrayDestory\n");
545+
LOG_DEBUG("cuMipmappedArrayDestroy\n");
546546
CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMipmappedArrayDestroy, hMipmappedArray);
547547
return res;
548548
}
@@ -558,6 +558,16 @@ CUresult cuLaunchKernel ( CUfunction f, unsigned int gridDimX, unsigned int gr
558558
return res;
559559
}
560560

561+
CUresult cuLaunchKernelEx(const CUlaunchConfig *config, CUfunction f, void **kernelParams, void **extra) {
562+
ENSURE_RUNNING();
563+
pre_launch_kernel();
564+
if (pidfound==1){
565+
rate_limiter(config->gridDimX * config->gridDimY * config->gridDimZ,
566+
config->blockDimX * config->blockDimY * config->blockDimZ);
567+
}
568+
CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuLaunchKernelEx,config,f,kernelParams,extra);
569+
return res;
570+
}
561571

562572
CUresult cuLaunchCooperativeKernel ( CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams ){
563573
ENSURE_RUNNING();

src/cuda/stream.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ CUresult cuStreamCreate(CUstream *phstream, unsigned int flags){
77
}
88

99
CUresult cuStreamDestroy_v2 ( CUstream hStream ){
10-
LOG_DEBUG("cuStreamDestory_v2 %p",hStream);
10+
LOG_DEBUG("cuStreamDestroy_v2 %p",hStream);
1111
return CUDA_OVERRIDE_CALL(cuda_library_entry,cuStreamDestroy_v2,hStream);
1212
}
1313

src/include/libcuda_hook.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,16 @@ typedef CUresult (*cuda_sym_t)();
3434
({ \
3535
LOG_DEBUG("Hijacking %s", #sym); \
3636
cuda_sym_t _entry = (cuda_sym_t)CUDA_FIND_ENTRY(table, sym); \
37+
if (_entry == NULL) { \
38+
LOG_ERROR("Hijack failed: %s is NULL", #sym); \
39+
} \
3740
_entry(__VA_ARGS__); \
3841
})
3942

4043
typedef enum {
4144
/* cuInit Part */
4245
CUDA_OVERRIDE_ENUM(cuInit),
43-
/* cuDeivce Part */
46+
/* cuDevice Part */
4447
CUDA_OVERRIDE_ENUM(cuDeviceGetAttribute),
4548
CUDA_OVERRIDE_ENUM(cuDeviceGet),
4649
CUDA_OVERRIDE_ENUM(cuDeviceGetCount),
@@ -142,6 +145,7 @@ typedef enum {
142145
CUDA_OVERRIDE_ENUM(cuFuncGetAttribute),
143146
CUDA_OVERRIDE_ENUM(cuFuncSetAttribute),
144147
CUDA_OVERRIDE_ENUM(cuLaunchKernel),
148+
CUDA_OVERRIDE_ENUM(cuLaunchKernelEx),
145149
CUDA_OVERRIDE_ENUM(cuLaunchCooperativeKernel),
146150
/* cuEvent Part */
147151
CUDA_OVERRIDE_ENUM(cuEventCreate),

src/include/nvml-subset.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -655,7 +655,7 @@ typedef enum nvmlGpuP2PCapsIndex_enum {
655655
/* P2P Capability Index Status*/
656656
typedef enum nvmlGpuP2PStatus_enum {
657657
NVML_P2P_STATUS_OK = 0,
658-
NVML_P2P_STATUS_CHIPSET_NOT_SUPPORED,
658+
NVML_P2P_STATUS_CHIPSET_NOT_SUPPORTED,
659659
NVML_P2P_STATUS_GPU_NOT_SUPPORTED,
660660
NVML_P2P_STATUS_IOH_TOPOLOGY_NOT_SUPPORTED,
661661
NVML_P2P_STATUS_DISABLED_BY_REGKEY,
@@ -808,7 +808,7 @@ typedef struct nvmlVgpuPgpuMetadata_st {
808808
char hostDriverVersion
809809
[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE]; //!< Host driver version
810810
unsigned int
811-
pgpuVirtualizationCaps; //!< Pgpu virtualizaion capabilities bitfileld
811+
pgpuVirtualizationCaps; //!< Pgpu virtualization capabilities bitfield
812812
unsigned int reserved[7]; //!< Reserved for internal use
813813
unsigned int opaqueDataSize; //!< Size of opaque data field in bytes
814814
char opaqueData[4]; //!< Opaque data
@@ -966,7 +966,7 @@ typedef enum nvmlVgpuVmCompatibility_enum {
966966
NVML_VGPU_VM_COMPATIBILITY_HIBERNATE =
967967
0x2, //!< vGPU is runnable from a hibernated state (ACPI S4)
968968
NVML_VGPU_VM_COMPATIBILITY_SLEEP =
969-
0x4, //!< vGPU is runnable from a sleeped state (ACPI S3)
969+
0x4, //!< vGPU is runnable from a slept state (ACPI S3)
970970
NVML_VGPU_VM_COMPATIBILITY_LIVE =
971971
0x8, //!< vGPU is runnable from a live/paused (ACPI S0)
972972
} nvmlVgpuVmCompatibility_t;
@@ -1093,7 +1093,7 @@ typedef enum nvmlVgpuVmIdType {
10931093
* Represents frame buffer capture session type
10941094
*/
10951095
typedef enum nvmlFBCSessionType_enum {
1096-
NVML_FBC_SESSION_TYPE_UNKNOWN = 0, //!< Unknwon
1096+
NVML_FBC_SESSION_TYPE_UNKNOWN = 0, //!< Unknown
10971097
NVML_FBC_SESSION_TYPE_TOSYS, //!< ToSys
10981098
NVML_FBC_SESSION_TYPE_CUDA, //!< Cuda
10991099
NVML_FBC_SESSION_TYPE_VID, //!< Vid

src/libvgpu.c

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,23 @@ FUNC_ATTR_VISIBLE void* dlsym(void* handle, const char* symbol) {
7474
LOG_DEBUG("into dlsym %s",symbol);
7575
pthread_once(&dlsym_init_flag,init_dlsym);
7676
if (real_dlsym == NULL) {
77-
real_dlsym = dlvsym(RTLD_NEXT,"dlsym","GLIBC_2.2.5");
77+
const char* glibc_versions[] = {
78+
"GLIBC_2.2.5", // for amd64
79+
"GLIBC_2.17", // for arm64
80+
"GLIBC_2.3",
81+
"GLIBC_2.4",
82+
"GLIBC_2.10",
83+
"GLIBC_2.18",
84+
"GLIBC_2.22",
85+
NULL
86+
};
87+
for (int i = 0; glibc_versions[i] != NULL; i++) {
88+
real_dlsym = dlvsym(RTLD_NEXT, "dlsym", glibc_versions[i]);
89+
if (real_dlsym != NULL) {
90+
LOG_DEBUG("found dlsym with version: %s", glibc_versions[i]);
91+
break;
92+
}
93+
}
7894
char *path_search=getenv("CUDA_REDIRECT");
7995
if ((path_search!=NULL) && (strlen(path_search)>0)){
8096
vgpulib = dlopen(path_search,RTLD_LAZY);
@@ -203,6 +219,7 @@ void* __dlsym_hook_section(void* handle, const char* symbol) {
203219
DLSYM_HOOK_FUNC(cuFuncGetAttribute);
204220
DLSYM_HOOK_FUNC(cuFuncSetAttribute);
205221
DLSYM_HOOK_FUNC(cuLaunchKernel);
222+
DLSYM_HOOK_FUNC(cuLaunchKernelEx);
206223
DLSYM_HOOK_FUNC(cuLaunchCooperativeKernel);
207224
DLSYM_HOOK_FUNC(cuIpcOpenMemHandle_v2);
208225
DLSYM_HOOK_FUNC(cuIpcGetMemHandle);
@@ -838,7 +855,13 @@ void* __dlsym_hook_section_nvml(void* handle, const char* symbol) {
838855
void preInit(){
839856
LOG_MSG("Initializing.....");
840857
if (real_dlsym == NULL) {
841-
real_dlsym = _dl_sym(RTLD_NEXT, "dlsym", dlsym);
858+
real_dlsym = dlvsym(RTLD_NEXT,"dlsym","GLIBC_2.2.5");
859+
if (real_dlsym == NULL) {
860+
LOG_ERROR("real dlsym not found");
861+
real_dlsym = _dl_sym(RTLD_NEXT, "dlsym", dlsym);
862+
if (real_dlsym == NULL)
863+
LOG_ERROR("real dlsym not found");
864+
}
842865
}
843866
real_realpath = NULL;
844867
load_cuda_libraries();
@@ -848,7 +871,7 @@ void preInit(){
848871

849872
void postInit(){
850873
allocator_init();
851-
874+
map_cuda_visible_devices();
852875
try_lock_unified_lock();
853876
nvmlReturn_t res = set_task_pid();
854877
try_unlock_unified_lock();
@@ -860,7 +883,6 @@ void postInit(){
860883
pidfound=1;
861884
}
862885

863-
map_cuda_visible_devices();
864886
//add_gpu_device_memory_usage(getpid(),0,context_size,0);
865887
env_utilization_switch = set_env_utilization_switch();
866888
init_utilization_watcher();

src/multiprocess/multiprocess_memory_limit.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,7 @@ int fix_lock_shrreg() {
436436
if (current_owner != 0) {
437437
int flag = 0;
438438
if (current_owner == region_info.pid) {
439-
LOG_INFO("Detect onwer pid = self pid (%d), "
439+
LOG_INFO("Detect owner pid = self pid (%d), "
440440
"indicates pid loopback or race condition", current_owner);
441441
flag = 1;
442442
} else {
@@ -589,7 +589,7 @@ void init_proc_slot_withlock() {
589589
}
590590
signal(SIGUSR2,sig_swap_stub);
591591
signal(SIGUSR1,sig_restore_stub);
592-
// If, by any means a pid of itself is found in region->proces, then it is probably caused by crashloop
592+
// If, by any means a pid of itself is found in region->process, then it is probably caused by crashloop
593593
// we need to reset it.
594594
int i,found=0;
595595
for (i=0; i<region->proc_num; i++) {

0 commit comments

Comments
 (0)