Skip to content

Commit af7f770

Browse files
authored
add ctx_error_type to ctx health data structure (Xilinx#9641)
Signed-off-by: Sri Latha Ayyannagari <SriLatha.Ayyannagari@amd.com>
1 parent f8e923a commit af7f770

2 files changed

Lines changed: 11 additions & 1 deletion

File tree

src/runtime_src/core/common/api/xrt_kernel.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4603,6 +4603,7 @@ aie_error_message_v1(const ert_packet* epkt, const std::string& msg)
46034603
} else if ( ctx_health->npu_gen == NPU_GEN_AIE4) {
46044604
oss << std::uppercase << std::hex << std::setfill('0');
46054605
oss << "ctx_state = 0x" << std::setw(indent8) << ctx_health->aie4.ctx_state
4606+
<< "\nctx_error_type = 0x" << std::setw(indent8) << ctx_health->aie4.ctx_error_type
46064607
<< "\nnumber of uC reported = "<<std::dec << ctx_health->aie4.num_uc;
46074608
for (uint32_t i = 0; i < ctx_health->aie4.num_uc; ++i) {
46084609
oss << "\nuc_info[" << i << "]: "

src/runtime_src/core/include/xrt/detail/ert.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -697,7 +697,15 @@ struct ert_uc_health_info {
697697
* which has context health data for aie2ps and aie4
698698
*
699699
* @ctx_state: context state
700-
* @num_ucs: number of uC reported
700+
* @num_uc: number of uC reported
701+
* @ctx_error_type: context error type. Error types:
702+
* NPU_ASYNC_EVENT_CTX_ERR_HWSCH_FAILURE: HWS error moving a context from one state to another
703+
* NPU_ASYNC_EVENT_CTX_ERR_STOP_FAILURE: HWS error stopping a context
704+
* NPU_ASYNC_EVENT_CTX_ERR_AIE_FAILURE: NPI error interrupt
705+
* NPU_ASYNC_EVENT_CTX_ERR_PREEMPTION_TIMEOUT: preemption took too long
706+
* NPU_ASYNC_EVENT_CTX_ERR_NEW_PROCESS_FAILURE:HWS error - unable to create a process for a context
707+
* NPU_ASYNC_EVENT_CTX_ERR_UC_CRITICAL_ERROR: CERT critical error interrupt
708+
* NPU_ASYNC_EVENT_CTX_ERR_UC_COMPLETION_TIMEOUT: Context TDR - CERT is stuck / control code hang
701709
* @uc_info: array for health data for each uC in the context.
702710
* the array size is based on num_certs.
703711
*
@@ -707,6 +715,7 @@ struct ert_uc_health_info {
707715
struct ert_ctx_health_data_aie4 {
708716
uint32_t ctx_state;
709717
uint32_t num_uc;
718+
uint32_t ctx_error_type;
710719
struct ert_uc_health_info uc_info[];
711720
};
712721

0 commit comments

Comments
 (0)