@@ -68,3 +68,34 @@ def test_modernbert_models(
6868 hf_output = torch .tensor (hf_output ).cpu ().float ()
6969 vllm_output = torch .tensor (vllm_output ).cpu ().float ()
7070 assert torch .allclose (hf_output , vllm_output , atol = 1e-2 )
71+
72+
73+ @pytest .mark .parametrize ("model" , ["bd2lcco/Qwen3-0.6B-finetuned" ])
74+ @pytest .mark .parametrize ("dtype" , ["float" ])
75+ @torch .inference_mode
76+ def test_auto_conversion (
77+ hf_runner ,
78+ vllm_runner ,
79+ example_prompts ,
80+ model : str ,
81+ dtype : str ,
82+ ) -> None :
83+ with vllm_runner (model , max_model_len = 1024 , dtype = dtype ) as vllm_model :
84+ vllm_outputs = vllm_model .token_classify (example_prompts )
85+
86+ with hf_runner (
87+ model , dtype = dtype , auto_cls = AutoModelForTokenClassification
88+ ) as hf_model :
89+ tokenizer = hf_model .tokenizer
90+ hf_outputs = []
91+ for prompt in example_prompts :
92+ inputs = tokenizer ([prompt ], return_tensors = "pt" )
93+ inputs = hf_model .wrap_device (inputs )
94+ output = hf_model .model (** inputs )
95+ hf_outputs .append (softmax (output .logits [0 ]))
96+
97+ # check logits difference
98+ for hf_output , vllm_output in zip (hf_outputs , vllm_outputs ):
99+ hf_output = torch .tensor (hf_output ).cpu ().float ()
100+ vllm_output = torch .tensor (vllm_output ).cpu ().float ()
101+ assert torch .allclose (hf_output , vllm_output , atol = 1e-2 )
0 commit comments