@@ -3794,7 +3794,7 @@ def run():
37943794
37953795 elif provider == "trtllm-decode" :
37963796 if _load_embedded_trtllm_indexer_topk () is None :
3797- raise RuntimeError ( "TRT-LLM indexerTopK extension unavailable " )
3797+ return float ( "nan" ), float ( "nan" ), float ( "nan " )
37983798 trtllm_out = torch .full ((batch , topk ), - 1 , dtype = torch .int32 , device = x .device )
37993799 trtllm_out_logits_aux = torch .empty ((batch , 10 * topk ), dtype = torch .float32 , device = x .device )
38003800 trtllm_out_indices_aux = torch .empty ((batch , 10 * topk ), dtype = torch .int32 , device = x .device )
@@ -3812,7 +3812,7 @@ def run():
38123812
38133813 elif provider == "trtllm-prefill" :
38143814 if _load_embedded_trtllm_indexer_topk () is None :
3815- raise RuntimeError ( "TRT-LLM indexerTopK extension unavailable " )
3815+ return float ( "nan" ), float ( "nan" ), float ( "nan " )
38163816 trtllm_out = torch .full ((batch , topk ), - 1 , dtype = torch .int32 , device = x .device )
38173817
38183818 def run ():
@@ -3826,7 +3826,7 @@ def run():
38263826
38273827 elif provider == "trtllm-prefill-1024threads" :
38283828 if _load_embedded_trtllm_indexer_topk (prefill_threads = 1024 ) is None :
3829- raise RuntimeError ( "TRT-LLM indexerTopK extension unavailable " )
3829+ return float ( "nan" ), float ( "nan" ), float ( "nan " )
38303830 trtllm_out = torch .full ((batch , topk ), - 1 , dtype = torch .int32 , device = x .device )
38313831
38323832 def run ():
0 commit comments