1212# See the License for the specific language governing permissions and
1313# limitations under the License.
1414
15- import rbln_envs as envs
15+ import vllm_rbln . rbln_envs as envs
1616
1717
1818def register ():
@@ -23,42 +23,41 @@ def register():
2323def register_model ():
2424 if not envs .VLLM_RBLN_USE_VLLM_MODEL :
2525 from vllm import ModelRegistry
26-
2726 ModelRegistry .register_model (
2827 "T5WithLMHeadModel" ,
29- "vllm_rbln.model_executor.models.optimum.t5:RBLNT5ForConditionalGeneration" ,
28+ "vllm_rbln.model_executor.models.optimum.t5:RBLNT5ForConditionalGeneration"
3029 )
3130 ModelRegistry .register_model (
3231 "T5ForConditionalGeneration" ,
33- "vllm_rbln.model_executor.models.optimum.t5:RBLNT5ForConditionalGeneration" ,
32+ "vllm_rbln.model_executor.models.optimum.t5:RBLNT5ForConditionalGeneration"
3433 )
3534 ModelRegistry .register_model (
3635 "T5EncoderModel" ,
37- "vllm_rbln.model_executor.models.optimum.encoder:RBLNOptimumForEncoderModel" ,
36+ "vllm_rbln.model_executor.models.optimum.encoder:RBLNOptimumForEncoderModel"
3837 )
3938 ModelRegistry .register_model (
4039 "Gemma3ForConditionalGeneration" ,
41- "vllm_rbln.model_executor.models.optimum.gemma3:RBLNOptimumGemma3ForConditionalGeneration" ,
40+ "vllm_rbln.model_executor.models.optimum.gemma3:RBLNOptimumGemma3ForConditionalGeneration"
4241 )
4342
4443
4544def register_ops ():
4645 if envs .VLLM_RBLN_USE_VLLM_MODEL :
47- import attention .layer # noqa
48- import forward_context # noqa
49- import lora .layer # noqa
50- import model_executor .layers .fused_moe .layer # noqa
51- import model_executor .layers .logits_processor # noqa
52- import model_executor .layers .quantization .kernels .mixed_precision # noqa
53- import model_executor .layers .rotary_embedding .base # noqa
54- import model_executor .layers .rotary_embedding .deepseek_scaling_rope # noqa
55- import model_executor .layers .vocab_parallel_embedding # noqa
56- import model_executor .model_loader .weight_loader # noqa
57- import models .deepseek_v2 # noqa
58- import models .qwen2_moe # noqa
59- import models .qwen3 # noqa
60- import models .qwen3_moe # noqa
61- import models .utils # noqa
62- from triton_kernels import attention # noqa
63- from triton_kernels import causal_attention # noqa
64- from triton_kernels import sliding_window_attention # noqa
46+ import vllm_rbln . attention .layer # noqa
47+ import vllm_rbln . forward_context # noqa
48+ import vllm_rbln . lora .layer # noqa
49+ import vllm_rbln . model_executor .layers .fused_moe .layer # noqa
50+ import vllm_rbln . model_executor .layers .logits_processor # noqa
51+ import vllm_rbln . model_executor .layers .quantization .kernels .mixed_precision # noqa
52+ import vllm_rbln . model_executor .layers .rotary_embedding .base # noqa
53+ import vllm_rbln . model_executor .layers .rotary_embedding .deepseek_scaling_rope # noqa
54+ import vllm_rbln . model_executor .layers .vocab_parallel_embedding # noqa
55+ import vllm_rbln . model_executor .model_loader .weight_loader # noqa
56+ import vllm_rbln . models .deepseek_v2 # noqa
57+ import vllm_rbln . models .qwen2_moe # noqa
58+ import vllm_rbln . models .qwen3 # noqa
59+ import vllm_rbln . models .qwen3_moe # noqa
60+ import vllm_rbln . models .utils # noqa
61+ from vllm_rbln . triton_kernels import attention # noqa
62+ from vllm_rbln . triton_kernels import causal_attention # noqa
63+ from vllm_rbln . triton_kernels import sliding_window_attention # noqa
0 commit comments