Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 44 additions & 43 deletions vlmeval/api/__init__.py
100644 → 100755
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please don't change the file mod.

Original file line number Diff line number Diff line change
@@ -1,43 +1,44 @@
from .gpt import OpenAIWrapper, GPT4V
from .hf_chat_model import HFChatModel
from .gemini import GeminiWrapper, Gemini
from .qwen_vl_api import QwenVLWrapper, QwenVLAPI, Qwen2VLAPI
from .qwen_api import QwenAPI
from .claude import Claude_Wrapper, Claude3V
from .reka import Reka
from .glm_vision import GLMVisionAPI
from .cloudwalk import CWWrapper
from .sensechat_vision import SenseChatVisionAPI
from .siliconflow import SiliconFlowAPI, TeleMMAPI
from .telemm import TeleMM2_API
from .telemm_thinking import TeleMM2Thinking_API
from .hunyuan import HunyuanVision
from .bailingmm import bailingMMAPI
from .bluelm_api import BlueLMWrapper, BlueLM_API
from .jt_vl_chat import JTVLChatAPI
from .jt_vl_chat_mini import JTVLChatAPI_Mini, JTVLChatAPI_2B
from .video_chat_online_v2 import VideoChatOnlineV2API
from .taiyi import TaiyiAPI
from .lmdeploy import LMDeployAPI
from .arm_thinker import ARM_thinker
from .taichu import TaichuVLAPI, TaichuVLRAPI
from .doubao_vl_api import DoubaoVL
from .mug_u import MUGUAPI
from .kimivl_api import KimiVLAPIWrapper, KimiVLAPI
from .rbdashmm_chat3_api import RBdashMMChat3_API, RBdashChat3_5_API
from .rbdashmm_chat3_5_api import RBdashMMChat3_78B_API, RBdashMMChat3_5_38B_API
from .together import TogetherAPI
from .gcp_vertex import GCPVertexAPI
from .bedrock import BedrockAPI

__all__ = [
'OpenAIWrapper', 'HFChatModel', 'GeminiWrapper', 'GPT4V', 'Gemini',
'QwenVLWrapper', 'QwenVLAPI', 'QwenAPI', 'Claude3V', 'Claude_Wrapper',
'Reka', 'GLMVisionAPI', 'CWWrapper', 'SenseChatVisionAPI', 'HunyuanVision',
'Qwen2VLAPI', 'BlueLMWrapper', 'BlueLM_API', 'JTVLChatAPI', 'JTVLChatAPI_Mini', 'JTVLChatAPI_2B',
'bailingMMAPI', 'TaiyiAPI', 'TeleMMAPI', 'SiliconFlowAPI', 'LMDeployAPI', 'ARM_thinker',
'TaichuVLAPI', 'TaichuVLRAPI', 'DoubaoVL', "MUGUAPI", 'KimiVLAPIWrapper', 'KimiVLAPI',
'RBdashMMChat3_API', 'RBdashChat3_5_API', 'RBdashMMChat3_78B_API', 'RBdashMMChat3_5_38B_API',
'VideoChatOnlineV2API', 'TeleMM2_API', 'TeleMM2Thinking_API',
'TogetherAPI', 'GCPVertexAPI', 'BedrockAPI',
]
from .gpt import OpenAIWrapper, GPT4V
from .hf_chat_model import HFChatModel
from .gemini import GeminiWrapper, Gemini
from .qwen_vl_api import QwenVLWrapper, QwenVLAPI, Qwen2VLAPI
from .qwen_api import QwenAPI
from .claude import Claude_Wrapper, Claude3V
from .reka import Reka
from .glm_vision import GLMVisionAPI
from .cloudwalk import CWWrapper
from .sensechat_vision import SenseChatVisionAPI
from .siliconflow import SiliconFlowAPI, TeleMMAPI
from .telemm import TeleMM2_API
from .telemm_thinking import TeleMM2Thinking_API
from .hunyuan import HunyuanVision
from .bailingmm import bailingMMAPI
from .bluelm_api import BlueLMWrapper, BlueLM_API
from .jt_vl_chat import JTVLChatAPI
from .jt_vl_chat_mini import JTVLChatAPI_Mini, JTVLChatAPI_2B
from .video_chat_online_v2 import VideoChatOnlineV2API
from .taiyi import TaiyiAPI
from .lmdeploy import LMDeployAPI
from .arm_thinker import ARM_thinker
from .taichu import TaichuVLAPI, TaichuVLRAPI
from .doubao_vl_api import DoubaoVL
from .mug_u import MUGUAPI
from .kimivl_api import KimiVLAPIWrapper, KimiVLAPI
from .rbdashmm_chat3_api import RBdashMMChat3_API, RBdashChat3_5_API
from .rbdashmm_chat3_5_api import RBdashMMChat3_78B_API, RBdashMMChat3_5_38B_API
from .together import TogetherAPI
from .gcp_vertex import GCPVertexAPI
from .bedrock import BedrockAPI
from .video_chat_online_v3 import VideoChatOnlineV3API

__all__ = [
'OpenAIWrapper', 'HFChatModel', 'GeminiWrapper', 'GPT4V', 'Gemini',
'QwenVLWrapper', 'QwenVLAPI', 'QwenAPI', 'Claude3V', 'Claude_Wrapper',
'Reka', 'GLMVisionAPI', 'CWWrapper', 'SenseChatVisionAPI', 'HunyuanVision',
'Qwen2VLAPI', 'BlueLMWrapper', 'BlueLM_API', 'JTVLChatAPI', 'JTVLChatAPI_Mini', 'JTVLChatAPI_2B',
'bailingMMAPI', 'TaiyiAPI', 'TeleMMAPI', 'SiliconFlowAPI', 'LMDeployAPI', 'ARM_thinker',
'TaichuVLAPI', 'TaichuVLRAPI', 'DoubaoVL', "MUGUAPI", 'KimiVLAPIWrapper', 'KimiVLAPI',
'RBdashMMChat3_API', 'RBdashChat3_5_API', 'RBdashMMChat3_78B_API', 'RBdashMMChat3_5_38B_API',
'VideoChatOnlineV2API', 'TeleMM2_API', 'TeleMM2Thinking_API',
'TogetherAPI', 'GCPVertexAPI', 'BedrockAPI', 'VideoChatOnlineV3API'
]
250 changes: 250 additions & 0 deletions vlmeval/api/base_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
import requests
import json
import base64
from typing import List, Dict, Any
import time


class VLLMClient:
def __init__(self, base_url: str = "http://localhost:9100/v1/chat/completions", app_code: str = 'B0m6Tuglt5shfY7t3GyoJn1V5yVAm0Ba'):
"""
初始化vLLM客户端

Args:
base_url: vLLM server地址
"""
self.base_url = base_url
self.app_code = app_code

def encode_image_to_base64(self, image_path: str) -> str:
"""
将图片编码为base64字符串

Args:
image_path: 图片路径

Returns:
base64编码的图片字符串
"""
with open(image_path, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
return encoded_string

def create_messages_with_images(
self,
prompt: str,
image_paths: List[str],
image_format: str = "base64"
) -> List[Dict]:
"""
创建包含图片的消息

Args:
prompt: 文本提示词
image_paths: 图片路径列表
image_format: 图片格式,支持"base64"或"url"

Returns:
消息列表
"""
messages = [
{
"role": "user",
"content": []
}
]

# 添加文本部分
messages[0]["content"].append({
"type": "text",
"text": prompt
})

# 添加图片部分
for image_path in image_paths:
if image_format == "base64":
# 读取并编码图片
base64_image = self.encode_image_to_base64(image_path)
image_content = {
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
elif image_format == "url":
image_content = {
"type": "image_url",
"image_url": {
"url": image_path
}
}
else:
raise ValueError(f"不支持的图片格式: {image_format}")

messages[0]["content"].append(image_content)

return messages

def stream_completion(
self,
prompt: str = None,
messages: List[Dict] = None,
image_paths: List[str] = None,
model: str = None,
max_tokens: int = None,
temperature: float = 0.7,
top_p: float = None,
stream: bool = True,
**kwargs
):
"""
流式输出请求

Args:
prompt: 文本提示词(如果使用messages参数,则忽略此参数)
messages: 消息列表(支持多模态)
image_paths: 图片路径列表
model: 模型名称
max_tokens: 最大token数
temperature: 温度参数
top_p: top-p采样参数
stream: 是否使用流式输出
**kwargs: 其他参数

Yields:
生成的文本片段
"""
# 构建请求体

request_data = {
"model": model,
"stream": stream,
**kwargs
}
if temperature is not None:
request_data["temperature"] = temperature
if max_tokens is not None:
request_data["max_tokens"] = max_tokens
if max_tokens is not None:
request_data["max_tokens"] = max_tokens
print("request_data:",request_data)
# 处理消息
if messages is not None:
request_data["messages"] = messages
elif image_paths is not None:
# 如果有图片路径,创建包含图片的消息
if prompt is None:
prompt = "请描述图片内容"
request_data["messages"] = self.create_messages_with_images(prompt, image_paths)
elif prompt is not None:
# 纯文本消息
request_data["messages"] = [
{
"role": "user",
"content": prompt
}
]
else:
raise ValueError("必须提供prompt、messages或image_paths中的一个")

# 发送请求
response = requests.post(
self.base_url,
json=request_data,
stream=True,
headers={"Content-Type": "application/json",'Authorization':self.app_code}
)

if response.status_code != 200:
raise Exception(f"请求失败,状态码: {response.status_code}, 响应: {response.text}")

# 处理流式响应
full_response = ""
for line in response.iter_lines():
if line:
line = line.decode('utf-8')

# 跳过SSE事件开始标记
if line.startswith('data: '):
data = line[6:] # 去掉"data: "前缀

# 检查是否为结束标记
if data == '[DONE]':
break

try:
# 解析JSON
json_data = json.loads(data)

# 提取内容
if 'choices' in json_data and len(json_data['choices']) > 0:
delta = json_data['choices'][0].get('delta', {})
content = delta.get('content', '')

if content:
full_response += content
yield content
except json.JSONDecodeError as e:
print(f"JSON解析错误: {e}, 原始数据: {data}")

# 返回完整响应
return full_response

def non_stream_completion(
self,
prompt: str = None,
messages: List[Dict] = None,
image_paths: List[str] = None,
model: str = None,
max_tokens: int = 512,
temperature: float = 0.7,
top_p: float = 0.9,
**kwargs
) -> Dict[str, Any]:
"""
非流式输出请求

Args:
参数同stream_completion

Returns:
完整的响应
"""
# 构建请求体
request_data = {
"model": model,
"max_tokens": max_tokens,
"temperature": temperature,
"top_p": top_p,
"stream": False,
**kwargs
}

# 处理消息(同流式版本)
if messages is not None:
request_data["messages"] = messages
elif image_paths is not None:
if prompt is None:
prompt = "请描述图片内容"
request_data["messages"] = self.create_messages_with_images(prompt, image_paths)
elif prompt is not None:
request_data["messages"] = [
{
"role": "user",
"content": prompt
}
]
else:
raise ValueError("必须提供prompt、messages或image_paths中的一个")

# 发送请求
response = requests.post(
self.base_url,
json=request_data,
headers={"Content-Type": "application/json"}
)

if response.status_code != 200:
raise Exception(f"请求失败,状态码: {response.status_code}, 响应: {response.text}")

return response.json()
Loading
Loading