Skip to content

Commit d8b8c9f

Browse files
committed
Stub NVML ECC counters locally
1 parent f9ee3ea commit d8b8c9f

3 files changed

Lines changed: 103 additions & 99 deletions

File tree

codegen/codegen.py

Lines changed: 91 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -764,6 +764,72 @@ def nvml_device_three_args_value(
764764
)
765765

766766

767+
def nvml_server_device_two_args_stub(first_type, second_type, out_type, value_init):
768+
return f"""nvmlDevice_t _lupine_device = nullptr;
769+
{first_type} _lupine_first = {{}};
770+
{second_type} _lupine_second = {{}};
771+
if (rpc_read(conn, &_lupine_device, sizeof(_lupine_device)) < 0 ||
772+
rpc_read(conn, &_lupine_first, sizeof(_lupine_first)) < 0 ||
773+
rpc_read(conn, &_lupine_second, sizeof(_lupine_second)) < 0) {{
774+
return -1;
775+
}}
776+
int _lupine_request_id = rpc_read_end(conn);
777+
if (_lupine_request_id < 0) {{
778+
return -1;
779+
}}
780+
781+
{out_type} _lupine_value = {value_init};
782+
nvmlReturn_t _lupine_result = NVML_SUCCESS;
783+
if (rpc_write_start_response(conn, _lupine_request_id) < 0 ||
784+
rpc_write(conn, &_lupine_value, sizeof(_lupine_value)) < 0 ||
785+
rpc_write(conn, &_lupine_result, sizeof(_lupine_result)) < 0 ||
786+
rpc_write_end(conn) < 0) {{
787+
return -1;
788+
}}
789+
return 0;"""
790+
791+
792+
def nvml_server_device_three_args_stub(
793+
first_type, second_type, third_type, out_type, value_init
794+
):
795+
return f"""nvmlDevice_t _lupine_device = nullptr;
796+
{first_type} _lupine_first = {{}};
797+
{second_type} _lupine_second = {{}};
798+
{third_type} _lupine_third = {{}};
799+
if (rpc_read(conn, &_lupine_device, sizeof(_lupine_device)) < 0 ||
800+
rpc_read(conn, &_lupine_first, sizeof(_lupine_first)) < 0 ||
801+
rpc_read(conn, &_lupine_second, sizeof(_lupine_second)) < 0 ||
802+
rpc_read(conn, &_lupine_third, sizeof(_lupine_third)) < 0) {{
803+
return -1;
804+
}}
805+
int _lupine_request_id = rpc_read_end(conn);
806+
if (_lupine_request_id < 0) {{
807+
return -1;
808+
}}
809+
810+
{out_type} _lupine_value = {value_init};
811+
nvmlReturn_t _lupine_result = NVML_SUCCESS;
812+
if (rpc_write_start_response(conn, _lupine_request_id) < 0 ||
813+
rpc_write(conn, &_lupine_value, sizeof(_lupine_value)) < 0 ||
814+
rpc_write(conn, &_lupine_result, sizeof(_lupine_result)) < 0 ||
815+
rpc_write_end(conn) < 0) {{
816+
return -1;
817+
}}
818+
return 0;"""
819+
820+
821+
def nvml_ecc_counter_stub(name, params, value_name, value_init, server_body):
822+
nvml_codegen_function(
823+
name,
824+
params,
825+
f"""if ({value_name} != nullptr) {{
826+
*{value_name} = {value_init};
827+
}}
828+
return NVML_SUCCESS;""",
829+
server_body,
830+
)
831+
832+
767833
def nvml_device_two_values(name, first_type, first_name, second_type, second_name):
768834
nvml_codegen_function(
769835
name,
@@ -849,34 +915,39 @@ def nvml_device_two_values(name, first_type, first_name, second_type, second_nam
849915
"nvmlPciInfo_t",
850916
"pci",
851917
)
852-
nvml_device_two_args_value(
918+
nvml_ecc_counter_stub(
853919
"nvmlDeviceGetTotalEccErrors",
854-
"nvmlMemoryErrorType_t",
855-
"errorType",
856-
"nvmlEccCounterType_t",
857-
"counterType",
858-
"unsigned long long",
920+
"nvmlDevice_t device, nvmlMemoryErrorType_t errorType, nvmlEccCounterType_t counterType, unsigned long long *eccCounts",
859921
"eccCounts",
922+
"0",
923+
nvml_server_device_two_args_stub(
924+
"nvmlMemoryErrorType_t", "nvmlEccCounterType_t", "unsigned long long", "0"
925+
),
860926
)
861-
nvml_device_two_args_value(
927+
nvml_ecc_counter_stub(
862928
"nvmlDeviceGetDetailedEccErrors",
863-
"nvmlMemoryErrorType_t",
864-
"errorType",
865-
"nvmlEccCounterType_t",
866-
"counterType",
867-
"nvmlEccErrorCounts_t",
929+
"nvmlDevice_t device, nvmlMemoryErrorType_t errorType, nvmlEccCounterType_t counterType, nvmlEccErrorCounts_t *eccCounts",
868930
"eccCounts",
931+
"nvmlEccErrorCounts_t{}",
932+
nvml_server_device_two_args_stub(
933+
"nvmlMemoryErrorType_t",
934+
"nvmlEccCounterType_t",
935+
"nvmlEccErrorCounts_t",
936+
"nvmlEccErrorCounts_t{}",
937+
),
869938
)
870-
nvml_device_three_args_value(
939+
nvml_ecc_counter_stub(
871940
"nvmlDeviceGetMemoryErrorCounter",
872-
"nvmlMemoryErrorType_t",
873-
"errorType",
874-
"nvmlEccCounterType_t",
875-
"counterType",
876-
"nvmlMemoryLocation_t",
877-
"locationType",
878-
"unsigned long long",
941+
"nvmlDevice_t device, nvmlMemoryErrorType_t errorType, nvmlEccCounterType_t counterType, nvmlMemoryLocation_t locationType, unsigned long long *count",
879942
"count",
943+
"0",
944+
nvml_server_device_three_args_stub(
945+
"nvmlMemoryErrorType_t",
946+
"nvmlEccCounterType_t",
947+
"nvmlMemoryLocation_t",
948+
"unsigned long long",
949+
"0",
950+
),
880951
)
881952
nvml_device_two_values(
882953
"nvmlDeviceGetEccMode",

codegen/gen_nvml_client.inc

Lines changed: 6 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -758,67 +758,24 @@ extern "C" nvmlReturn_t nvmlDeviceGetNvLinkRemotePciInfo_v2(nvmlDevice_t device,
758758
}
759759

760760
extern "C" nvmlReturn_t nvmlDeviceGetTotalEccErrors(nvmlDevice_t device, nvmlMemoryErrorType_t errorType, nvmlEccCounterType_t counterType, unsigned long long *eccCounts) {
761-
conn_t *_lupine_conn = connection_for_device(&device);
762-
nvmlReturn_t _lupine_result = rpc_error();
763-
unsigned long long _lupine_value = {};
764-
if (_lupine_conn == nullptr ||
765-
rpc_write_start_request(_lupine_conn, RPC_nvmlDeviceGetTotalEccErrors) < 0 ||
766-
rpc_write(_lupine_conn, &device, sizeof(device)) < 0 ||
767-
rpc_write(_lupine_conn, &errorType, sizeof(errorType)) < 0 ||
768-
rpc_write(_lupine_conn, &counterType, sizeof(counterType)) < 0 ||
769-
rpc_wait_for_response(_lupine_conn) < 0 ||
770-
rpc_read(_lupine_conn, &_lupine_value, sizeof(_lupine_value)) < 0 ||
771-
rpc_read(_lupine_conn, &_lupine_result, sizeof(_lupine_result)) < 0 ||
772-
rpc_read_end(_lupine_conn) < 0) {
773-
return rpc_error();
774-
}
775761
if (eccCounts != nullptr) {
776-
*eccCounts = _lupine_value;
762+
*eccCounts = 0;
777763
}
778-
return _lupine_result;
764+
return NVML_SUCCESS;
779765
}
780766

781767
extern "C" nvmlReturn_t nvmlDeviceGetDetailedEccErrors(nvmlDevice_t device, nvmlMemoryErrorType_t errorType, nvmlEccCounterType_t counterType, nvmlEccErrorCounts_t *eccCounts) {
782-
conn_t *_lupine_conn = connection_for_device(&device);
783-
nvmlReturn_t _lupine_result = rpc_error();
784-
nvmlEccErrorCounts_t _lupine_value = {};
785-
if (_lupine_conn == nullptr ||
786-
rpc_write_start_request(_lupine_conn, RPC_nvmlDeviceGetDetailedEccErrors) < 0 ||
787-
rpc_write(_lupine_conn, &device, sizeof(device)) < 0 ||
788-
rpc_write(_lupine_conn, &errorType, sizeof(errorType)) < 0 ||
789-
rpc_write(_lupine_conn, &counterType, sizeof(counterType)) < 0 ||
790-
rpc_wait_for_response(_lupine_conn) < 0 ||
791-
rpc_read(_lupine_conn, &_lupine_value, sizeof(_lupine_value)) < 0 ||
792-
rpc_read(_lupine_conn, &_lupine_result, sizeof(_lupine_result)) < 0 ||
793-
rpc_read_end(_lupine_conn) < 0) {
794-
return rpc_error();
795-
}
796768
if (eccCounts != nullptr) {
797-
*eccCounts = _lupine_value;
769+
*eccCounts = nvmlEccErrorCounts_t{};
798770
}
799-
return _lupine_result;
771+
return NVML_SUCCESS;
800772
}
801773

802774
extern "C" nvmlReturn_t nvmlDeviceGetMemoryErrorCounter(nvmlDevice_t device, nvmlMemoryErrorType_t errorType, nvmlEccCounterType_t counterType, nvmlMemoryLocation_t locationType, unsigned long long *count) {
803-
conn_t *_lupine_conn = connection_for_device(&device);
804-
nvmlReturn_t _lupine_result = rpc_error();
805-
unsigned long long _lupine_value = {};
806-
if (_lupine_conn == nullptr ||
807-
rpc_write_start_request(_lupine_conn, RPC_nvmlDeviceGetMemoryErrorCounter) < 0 ||
808-
rpc_write(_lupine_conn, &device, sizeof(device)) < 0 ||
809-
rpc_write(_lupine_conn, &errorType, sizeof(errorType)) < 0 ||
810-
rpc_write(_lupine_conn, &counterType, sizeof(counterType)) < 0 ||
811-
rpc_write(_lupine_conn, &locationType, sizeof(locationType)) < 0 ||
812-
rpc_wait_for_response(_lupine_conn) < 0 ||
813-
rpc_read(_lupine_conn, &_lupine_value, sizeof(_lupine_value)) < 0 ||
814-
rpc_read(_lupine_conn, &_lupine_result, sizeof(_lupine_result)) < 0 ||
815-
rpc_read_end(_lupine_conn) < 0) {
816-
return rpc_error();
817-
}
818775
if (count != nullptr) {
819-
*count = _lupine_value;
776+
*count = 0;
820777
}
821-
return _lupine_result;
778+
return NVML_SUCCESS;
822779
}
823780

824781
extern "C" nvmlReturn_t nvmlDeviceGetEccMode(nvmlDevice_t device, nvmlEnableState_t *current, nvmlEnableState_t *pending) {

codegen/gen_nvml_server.inc

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,16 +1097,8 @@ int handle_nvmlDeviceGetTotalEccErrors(conn_t *conn) {
10971097
return -1;
10981098
}
10991099

1100-
unsigned long long _lupine_value = {};
1101-
using _lupine_fn_t =
1102-
nvmlReturn_t (*)(nvmlDevice_t, nvmlMemoryErrorType_t, nvmlEccCounterType_t, unsigned long long *);
1103-
_lupine_fn_t _lupine_fn = nvml_symbol<_lupine_fn_t>("nvmlDeviceGetTotalEccErrors");
1104-
nvmlReturn_t _lupine_result =
1105-
_lupine_fn == nullptr
1106-
? function_not_found()
1107-
: _lupine_fn(_lupine_device, _lupine_first, _lupine_second,
1108-
&_lupine_value);
1109-
1100+
unsigned long long _lupine_value = 0;
1101+
nvmlReturn_t _lupine_result = NVML_SUCCESS;
11101102
if (rpc_write_start_response(conn, _lupine_request_id) < 0 ||
11111103
rpc_write(conn, &_lupine_value, sizeof(_lupine_value)) < 0 ||
11121104
rpc_write(conn, &_lupine_result, sizeof(_lupine_result)) < 0 ||
@@ -1130,16 +1122,8 @@ int handle_nvmlDeviceGetDetailedEccErrors(conn_t *conn) {
11301122
return -1;
11311123
}
11321124

1133-
nvmlEccErrorCounts_t _lupine_value = {};
1134-
using _lupine_fn_t =
1135-
nvmlReturn_t (*)(nvmlDevice_t, nvmlMemoryErrorType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *);
1136-
_lupine_fn_t _lupine_fn = nvml_symbol<_lupine_fn_t>("nvmlDeviceGetDetailedEccErrors");
1137-
nvmlReturn_t _lupine_result =
1138-
_lupine_fn == nullptr
1139-
? function_not_found()
1140-
: _lupine_fn(_lupine_device, _lupine_first, _lupine_second,
1141-
&_lupine_value);
1142-
1125+
nvmlEccErrorCounts_t _lupine_value = nvmlEccErrorCounts_t{};
1126+
nvmlReturn_t _lupine_result = NVML_SUCCESS;
11431127
if (rpc_write_start_response(conn, _lupine_request_id) < 0 ||
11441128
rpc_write(conn, &_lupine_value, sizeof(_lupine_value)) < 0 ||
11451129
rpc_write(conn, &_lupine_result, sizeof(_lupine_result)) < 0 ||
@@ -1165,16 +1149,8 @@ int handle_nvmlDeviceGetMemoryErrorCounter(conn_t *conn) {
11651149
return -1;
11661150
}
11671151

1168-
unsigned long long _lupine_value = {};
1169-
using _lupine_fn_t = nvmlReturn_t (*)(nvmlDevice_t, nvmlMemoryErrorType_t, nvmlEccCounterType_t,
1170-
nvmlMemoryLocation_t, unsigned long long *);
1171-
_lupine_fn_t _lupine_fn = nvml_symbol<_lupine_fn_t>("nvmlDeviceGetMemoryErrorCounter");
1172-
nvmlReturn_t _lupine_result =
1173-
_lupine_fn == nullptr
1174-
? function_not_found()
1175-
: _lupine_fn(_lupine_device, _lupine_first, _lupine_second,
1176-
_lupine_third, &_lupine_value);
1177-
1152+
unsigned long long _lupine_value = 0;
1153+
nvmlReturn_t _lupine_result = NVML_SUCCESS;
11781154
if (rpc_write_start_response(conn, _lupine_request_id) < 0 ||
11791155
rpc_write(conn, &_lupine_value, sizeof(_lupine_value)) < 0 ||
11801156
rpc_write(conn, &_lupine_result, sizeof(_lupine_result)) < 0 ||

0 commit comments

Comments
 (0)