11"""vllm kunlun init"""
2- from .platforms import current_platform
3- import sys
4- import importlib
5- import warnings
2+
63import builtins
7- import os
8- import time
9- import vllm .envs as envs
4+ import importlib
5+ import logging
6+ import sys
7+
8+ logger = logging .getLogger (__name__ )
9+
1010OLD_IMPORT_HOOK = builtins .__import__
11+ _kv_admission_patched = False
12+ _kv_scheduler_patched = False
13+
14+
1115def _custom_import (module_name , globals = None , locals = None , fromlist = (), level = 0 ):
16+ global _kv_admission_patched , _kv_scheduler_patched
1217 try :
1318 module_mappings = {
1419 "vllm.compilation.wrapper" : "vllm_kunlun.compilation.wrapper" ,
1520 "vllm.v1.worker.utils" : "vllm_kunlun.v1.worker.utils" ,
16- "vllm.model_executor.model_loader.bitsandbytes_loader" : "vllm_kunlun.models.model_loader.bitsandbytes_loader" ,
1721 "vllm.v1.sample.ops.topk_topp_sampler" : "vllm_kunlun.v1.sample.ops.topk_topp_sampler" ,
22+ "vllm.model_executor.model_loader.bitsandbytes_loader" : "vllm_kunlun.models.model_loader.bitsandbytes_loader" ,
1823 "vllm.model_executor.layers.sampler" : "vllm_kunlun.ops.sample.sampler" ,
19- "vllm.v1.sample.ops.topk_topp_sampler" : "vllm_kunlun.v1.sample.ops.topk_topp_sampler" ,
2024 "vllm.v1.sample.rejection_sampler" : "vllm_kunlun.v1.sample.rejection_sampler" ,
2125 "vllm.attention.ops.merge_attn_states" : "vllm_kunlun.ops.attention.merge_attn_states" ,
22- "vllm.v1.attention.backends.gdn_attn" : "vllm_kunlun.v1.attention.backends.gdn_attn"
26+ "vllm.v1.attention.backends.gdn_attn" : "vllm_kunlun.v1.attention.backends.gdn_attn" ,
2327 }
2428
2529 if module_name in module_mappings :
@@ -29,48 +33,81 @@ def _custom_import(module_name, globals=None, locals=None, fromlist=(), level=0)
2933 module = importlib .import_module (target_module )
3034 sys .modules [module_name ] = module
3135 sys .modules [target_module ] = module
32- except Exception :
33- pass
34-
35- return OLD_IMPORT_HOOK (
36- module_name ,
37- globals = globals ,
38- locals = locals ,
39- fromlist = fromlist ,
40- level = level
36+ except Exception as e :
37+ logger .warning ("vllm_kunlun: failed to remap module %s: %s" , module_name , e )
38+
39+ result = OLD_IMPORT_HOOK (
40+ module_name , globals = globals , locals = locals , fromlist = fromlist , level = level
4141 )
4242
43+ # Apply KV admission gate patch after kv_cache_manager is fully loaded.
44+ # Deferred to avoid importing vllm internals during early platform registration.
45+ if not _kv_admission_patched and module_name == "vllm.v1.core.kv_cache_manager" :
46+ try :
47+ from vllm_kunlun .patches .kv_admission import apply as _apply_kv
48+
49+ _apply_kv ()
50+ _kv_admission_patched = True
51+ except Exception as e :
52+ logger .warning ("vllm_kunlun: failed to apply KV admission patch: %s" , e )
53+
54+ # Apply partial-prefill concurrency limit patch after scheduler is loaded.
55+ if not _kv_scheduler_patched and module_name == "vllm.v1.core.sched.scheduler" :
56+ try :
57+ from vllm_kunlun .patches .kv_admission import apply_scheduler as _apply_sched
58+
59+ _apply_sched ()
60+ _kv_scheduler_patched = True
61+ except Exception as e :
62+ logger .warning ("vllm_kunlun: failed to apply scheduler patch: %s" , e )
63+
64+ return result
65+
66+
4367def import_hook ():
4468 """Apply import hook for VLLM Kunlun"""
4569 builtins .__import__ = _custom_import
4670
71+
4772def register ():
4873 """Register the Kunlun platform"""
49- from .utils import redirect_output
50- from .vllm_utils_wrapper import direct_register_custom_op , patch_annotations_for_schema
51-
74+
75+ # import for patch some codes
5276 # Change for GLM5 and custom model configs.
5377 import vllm .transformers_utils .config as config_module
78+
5479 from .transformer_utils .config import _XPU_CONFIG_REGISTRY
80+ from .utils import redirect_output # noqa: F401
81+
82+ # import for patch some codes
83+ from .vllm_utils_wrapper import direct_register_custom_op # noqa: F401
84+
5585 config_module ._CONFIG_REGISTRY = _XPU_CONFIG_REGISTRY
5686
5787 import vllm .transformers_utils .configs as configs_module
88+
5889 from .transformer_utils .kimi_k25 import KimiK25Config , KimiK25VisionConfig
90+
5991 setattr (configs_module , "KimiK25Config" , KimiK25Config )
6092 setattr (configs_module , "KimiK25VisionConfig" , KimiK25VisionConfig )
61-
93+
6294 import vllm .config .model as model_module
95+
6396 from .config .model import is_deepseek_mla
97+
6498 model_module .ModelConfig .is_deepseek_mla = property (is_deepseek_mla )
65-
99+
66100 import_hook ()
67101 return "vllm_kunlun.platforms.kunlun.KunlunPlatform"
68102
103+
69104def register_model ():
70105 """Register models for training and inference"""
71106 from .models import register_model as _reg
107+
72108 _reg ()
73109
110+
74111def register_tool_parser ():
75112 from .entrypoints .openai .tool_parsers import (
76113 register_tool_parser as _reg_tool_parser ,
0 commit comments