66from services .providers .base import AbstractModelProvider , _classify_provider_error
77
88
9+ DASHSCOPE_IMAGE_GENERATION_KEYWORDS = (
10+ "image" ,
11+ "wanx" ,
12+ "aitryon" ,
13+ "tryon" ,
14+ "flux" ,
15+ "stable-diffusion" ,
16+ "sdxl" ,
17+ )
18+ DASHSCOPE_IMAGE_UNDERSTANDING_KEYWORDS = (
19+ "qwen-vl" ,
20+ "qwen2-vl" ,
21+ "qwen2.5-vl" ,
22+ "qwen3-vl" ,
23+ "qwen3.5-vl" ,
24+ "qwen3.6-vl" ,
25+ "-vl" ,
26+ "vl-" ,
27+ "vision" ,
28+ "visual" ,
29+ "ocr" ,
30+ "qwen3.6" ,
31+ "qwen-3.6" ,
32+ )
33+ DASHSCOPE_VIDEO_UNDERSTANDING_KEYWORDS = ("omni" , "video-understanding" , "video-ocr" )
34+
35+
36+ def _modality_set (value ) -> set :
37+ if not value :
38+ return set ()
39+ if isinstance (value , str ):
40+ return {value .lower ()}
41+ return {str (item ).lower () for item in value }
42+
43+
44+ def _has_keyword (text : str , keywords : tuple ) -> bool :
45+ return any (keyword in text for keyword in keywords )
46+
47+
48+ def _is_dashscope_explicit_image_understanding_model (model_id : str ) -> bool :
49+ return _has_keyword (model_id , DASHSCOPE_IMAGE_UNDERSTANDING_KEYWORDS )
50+
51+
52+ def _is_dashscope_image_generation_model (model_id : str , desc : str , req_mods : set , res_mods : set ) -> bool :
53+ if _is_dashscope_explicit_image_understanding_model (model_id ):
54+ return False
55+ return "image" in res_mods or _has_keyword (model_id , DASHSCOPE_IMAGE_GENERATION_KEYWORDS )
56+
57+
58+ def _is_dashscope_video_understanding_model (model_id : str , desc : str , req_mods : set , res_mods : set ) -> bool :
59+ searchable_text = f"{ model_id } { desc .lower ()} "
60+ if "video" in req_mods and "text" in res_mods :
61+ return True
62+ return _has_keyword (searchable_text , DASHSCOPE_VIDEO_UNDERSTANDING_KEYWORDS )
63+
64+
65+ def _is_dashscope_image_understanding_model (model_id : str , desc : str , req_mods : set , res_mods : set ) -> bool :
66+ searchable_text = f"{ model_id } { desc .lower ()} "
67+ if _is_dashscope_image_generation_model (model_id , desc , req_mods , res_mods ):
68+ return False
69+ if _is_dashscope_video_understanding_model (model_id , desc , req_mods , res_mods ):
70+ return False
71+ if ("image" in req_mods or "video" in req_mods ) and "text" in res_mods :
72+ return True
73+ return _is_dashscope_explicit_image_understanding_model (model_id ) or _has_keyword (
74+ searchable_text , DASHSCOPE_IMAGE_UNDERSTANDING_KEYWORDS
75+ )
76+
77+
978class DashScopeModelProvider (AbstractModelProvider ):
1079 """Concrete implementation for DashScope (Aliyun) provider."""
1180
@@ -57,6 +126,8 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
57126 categorized_models = {
58127 "chat" : [], # Maps to "llm"
59128 "vlm" : [], # Maps to "vlm"
129+ "vlm2" : [], # Maps to image generation models
130+ "vlm3" : [], # Maps to video understanding models
60131 "embedding" : [], # Maps to "embedding" / "multi_embedding"
61132 "rerank" : [], # Maps to "rerank"
62133 "tts" : [], # Maps to "tts"
@@ -71,6 +142,8 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
71142 metadata = model_obj .get ('inference_metadata' ) or {}
72143 req_mod = metadata .get ('request_modality' , [])
73144 res_mod = metadata .get ('response_modality' , [])
145+ req_mods = _modality_set (req_mod )
146+ res_mods = _modality_set (res_mod )
74147 model_obj .setdefault ("object" , model_obj .get ("object" , "model" ))
75148 model_obj .setdefault ("owned_by" , model_obj .get ("owned_by" , "dashscope" ))
76149 cleaned_model = {
@@ -107,8 +180,17 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
107180 continue
108181
109182 # 5. VLM
110- vision_mods = {'Image' , 'Video' }
111- if (set (req_mod ) & vision_mods ) or (set (res_mod ) & vision_mods ) or '视觉' in desc :
183+ if _is_dashscope_video_understanding_model (m_id , desc , req_mods , res_mods ):
184+ cleaned_model .update ({"model_tag" : "chat" , "model_type" : "vlm3" })
185+ categorized_models ['vlm3' ].append (cleaned_model )
186+ continue
187+
188+ if _is_dashscope_image_generation_model (m_id , desc , req_mods , res_mods ):
189+ cleaned_model .update ({"model_tag" : "chat" , "model_type" : "vlm2" })
190+ categorized_models ['vlm2' ].append (cleaned_model )
191+ continue
192+
193+ if _is_dashscope_image_understanding_model (m_id , desc , req_mods , res_mods ):
112194 cleaned_model .update ({"model_tag" : "chat" , "model_type" : "vlm" })
113195 categorized_models ['vlm' ].append (cleaned_model )
114196 continue
@@ -124,7 +206,10 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
124206 elif target_model_type in ("embedding" , "multi_embedding" ):
125207 return categorized_models ["embedding" ]
126208 elif target_model_type in categorized_models :
127- return categorized_models [target_model_type ]
209+ return [
210+ {** model , "model_type" : target_model_type }
211+ for model in categorized_models [target_model_type ]
212+ ]
128213 else :
129214 return []
130215 except (httpx .HTTPStatusError , httpx .ConnectTimeout , httpx .ConnectError , Exception ) as e :
0 commit comments