open-compass · D1m7asis · Jun 19, 2025 · Jun 19, 2025
diff --git a/docs/en/Quickstart.md b/docs/en/Quickstart.md
@@ -31,6 +31,8 @@ To infer with API models (GPT-4v, Gemini-Pro-V, etc.) or use LLM APIs as the **j
   OPENAI_API_BASE=
   # StepAI API
   STEPAI_API_KEY=
+  # AI/ML API
+  AIML_API_KEY=
   # REKA API
   REKA_API_KEY=
   # GLMV API
@@ -50,6 +52,38 @@ To infer with API models (GPT-4v, Gemini-Pro-V, etc.) or use LLM APIs as the **j
   ```
 
 - Fill the blanks with your API keys (if necessary). Those API keys will be automatically loaded when doing the inference and evaluation.
+
+## 🌐 Supported Model Provider: AI/ML API Integration
+
+VLMEvalKit now includes first-class support for [AI/ML API](https://aimlapi.com/app/?utm_source=VLMEvalKit&utm_medium=github&utm_campaign=integration) — a unified provider of 300+ popular models, including:
+
+* 🧠 OpenAI (GPT-4o, GPT-3.5)
+* 🐑 Claude 3.5 Series
+* 🔹 Gemini 1.5 Pro / Flash
+* 📚 DeepSeek, LLaMA3, Mistral and more
+
+**Key Highlights of AI/ML API:**
+
+* ⚡ High rate limits
+* 💼 Enterprise-grade uptime
+* ♻ Fully OpenAI-compatible interface
+
+📘 [Documentation](https://docs.aimlapi.com/?utm_source=VLMEvalKit&utm_medium=github&utm_campaign=integration)
+
+🔗 [Explore Models](https://aimlapi.com/models/?utm_source=VLMEvalKit&utm_medium=github&utm_campaign=integration)
+
+To enable AI/ML API, set your key:
+
+```bash
+AIML_API_KEY=your_api_key
+```
+
+Then use any supported AIMLAPI model in the `--model` argument. For example:
+
+```bash
+python run.py --data MMBench_DEV_EN --model aimlapi_gpt_4o
+```
+
 ## Step 1. Configuration
 
 **VLM Configuration**: All VLMs are configured in `vlmeval/config.py`. Few legacy VLMs (like MiniGPT-4, LLaVA-v1-7B) requires additional configuration (configuring the code / model_weight root in the config file). During evaluation, you should use the model name specified in `supported_VLM` in `vlmeval/config.py` to select the VLM. Make sure you can successfully infer with the VLM before starting the evaluation with the following command `vlmutil check {MODEL_NAME}`.

diff --git a/docs/zh-CN/Quickstart.md b/docs/zh-CN/Quickstart.md
@@ -30,6 +30,8 @@ pip install -e .
   OPENAI_API_BASE=
   # StepAI API
   STEPAI_API_KEY=
+  # AI/ML API
+  AIML_API_KEY=
   # REKA API
   REKA_API_KEY=
   # GLMV API
@@ -49,6 +51,38 @@ pip install -e .
   ```
 
 - 如果需要使用 API 在对应键值空白处填写上你的密钥。这些 API 密钥将在进行推理和评估时自动加载。
+
+## 🌐 支持的模型服务商：AI/ML API 接入
+
+VLMEvalKit 现已原生支持 [AI/ML API](https://aimlapi.com/app/?utm_source=VLMEvalKit&utm_medium=github&utm_campaign=integration) —— 一个统一的接口平台，整合了 300+ 主流模型，包括：
+
+* 🧠 OpenAI（GPT-4o, GPT-3.5）
+* 🐑 Claude 3.5 系列
+* 🔹 Gemini 1.5 Pro / Flash
+* 📚 DeepSeek，LLaMA3，Mistral 等
+
+**AI/ML API 特点：**
+
+* ⚡ 高应对请求限率
+* 💼 企业级稳定性
+* ♻ 完全兼容 OpenAI API 接口
+
+📘 [使用文档](https://docs.aimlapi.com/?utm_source=VLMEvalKit&utm_medium=github&utm_campaign=integration)
+
+🔗 [模型目录](https://aimlapi.com/models/?utm_source=VLMEvalKit&utm_medium=github&utm_campaign=integration)
+
+要启用 AI/ML API，请设置环境变量：
+
+```bash
+AIML_API_KEY=你的密钥
+```
+
+随后在 `--model` 参数中使用相应的 AIMLAPI 模型，例如：
+
+```bash
+python run.py --data MMBench_DEV_EN --model aimlapi_gpt_4o
+```
+
 ## 第1步 配置
 
 **VLM 配置**：所有 VLMs 都在 `vlmeval/config.py` 中配置。对于某些 VLMs（如 MiniGPT-4、LLaVA-v1-7B），需要额外的配置（在配置文件中配置代码 / 模型权重根目录）。在评估时，你应该使用 `vlmeval/config.py` 中 `supported_VLM` 指定的模型名称来选择 VLM。确保在开始评估之前，你可以成功使用 VLM 进行推理，使用以下命令 `vlmutil check {MODEL_NAME}`。

diff --git a/vlmeval/api/__init__.py b/vlmeval/api/__init__.py
@@ -18,12 +18,13 @@
 from .taichu import TaichuVLAPI, TaichuVLRAPI
 from .doubao_vl_api import DoubaoVL
 from .mug_u import MUGUAPI
+from .aimlapi import AIMLAPI
 
 __all__ = [
     'OpenAIWrapper', 'HFChatModel', 'GeminiWrapper', 'GPT4V', 'Gemini',
     'QwenVLWrapper', 'QwenVLAPI', 'QwenAPI', 'Claude3V', 'Claude_Wrapper',
     'Reka', 'GLMVisionAPI', 'CWWrapper', 'SenseChatVisionAPI', 'HunyuanVision',
     'Qwen2VLAPI', 'BlueLMWrapper', 'BlueLM_V_API', 'JTVLChatAPI',
     'bailingMMAPI', 'TaiyiAPI', 'TeleMMAPI', 'SiliconFlowAPI', 'LMDeployAPI',
-    'TaichuVLAPI', 'TaichuVLRAPI', 'DoubaoVL', "MUGUAPI"
+    'TaichuVLAPI', 'TaichuVLRAPI', 'DoubaoVL', "MUGUAPI", "AIMLAPI",
 ]
diff --git a/vlmeval/api/aimlapi.py b/vlmeval/api/aimlapi.py
@@ -0,0 +1,102 @@
+from vlmeval.smp import *
+from vlmeval.api.base import BaseAPI
+
+url = 'https://api.aimlapi.com/v1/chat/completions'
+headers = {
+    'Content-Type': 'application/json',
+    'Authorization': 'Bearer {}',
+    "HTTP-Referer": "https://github.com/open-compass/VLMEvalKit",
+    "X-Title": "VLMEvalKit",
+}
+
+
+class AIMLAPI_INT(BaseAPI):
+
+    is_api: bool = True
+
+    def __init__(self,
+                 model: str = 'gpt-4-turbo',
+                 retry: int = 10,
+                 wait: int = 3,
+                 key: str = None,
+                 temperature: float = 0,
+                 max_tokens: int = 300,
+                 verbose: bool = True,
+                 system_prompt: str = None,
+                 **kwargs):
+        self.model = model
+        self.fail_msg = 'Fail to obtain answer via API.'
+        self.headers = dict(headers)
+        self.temperature = temperature
+        self.max_tokens = max_tokens
+        self.system_prompt = system_prompt
+        self.key = key or os.environ.get('AIML_API_KEY', '')
+        self.headers['Authorization'] = self.headers['Authorization'].format(self.key)
+
+        super().__init__(retry=retry, wait=wait, verbose=verbose, system_prompt=system_prompt, **kwargs)
+
+    @staticmethod
+    def build_msgs(msgs_raw):
+        messages = []
+        content = []
+
+        for msg in msgs_raw:
+            if msg['type'] == 'text':
+                content.append({
+                    "type": "text",
+                    "text": msg['value']
+                })
+            elif msg['type'] == 'image':
+                image_b64 = encode_image_file_to_base64(msg['value'])
+                content.append({
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/webp;base64,{image_b64}"
+                    }
+                })
+
+        messages.append({
+            "role": "user",
+            "content": content
+        })
+        return messages
+
+    def generate_inner(self, inputs, **kwargs) -> str:
+        payload = dict(
+            model=self.model,
+            max_tokens=self.max_tokens,
+            temperature=self.temperature,
+            messages=self.build_msgs(inputs),
+            **kwargs
+        )
+        response = requests.post(url, headers=self.headers, data=json.dumps(payload))
+        ret_code = response.status_code
+        ret_code = 0 if (200 <= int(ret_code) < 300) else ret_code
+
+        answer = self.fail_msg
+        try:
+            resp_struct = json.loads(response.text)
+            answer = resp_struct['choices'][0]['message']['content'].strip()
+        except Exception as err:
+            if self.verbose:
+                self.logger.error(f'{type(err)}: {err}')
+                self.logger.error(response.text if hasattr(response, 'text') else response)
+
+        return ret_code, answer, response
+
+
+class AIMLAPI(AIMLAPI_INT):
+
+    def generate(self, message, dataset=None):
+        return super(AIMLAPI_INT, self).generate(message)
+
+
+if __name__ == '__main__':
+    # export AIML_API_KEY=''
+    model = AIMLAPI_INT(verbose=True)
+    inputs = [
+        {'type': 'image', 'value': '../../assets/apple.jpg'},
+        {'type': 'text', 'value': 'Please describe this image in detail.'},
+    ]
+    code, answer, resp = model.generate_inner(inputs)
+    print(code, answer, resp)