forked from modelscope/ms-swift
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclient.py
More file actions
57 lines (48 loc) · 1.64 KB
/
client.py
File metadata and controls
57 lines (48 loc) · 1.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from openai import OpenAI
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
def infer(client, model: str, messages):
# You can also use client.embeddings.create
# But this interface does not support multi-modal medias
resp = client.chat.completions.create(model=model, messages=messages)
emb = resp.data[0]['embedding']
shape = len(emb)
sample = str(emb)
if len(emb) > 6:
sample = str(emb[:3])[:-1] + ', ..., ' + str(emb[-3:])[1:]
print(f'messages: {messages}')
print(f'Embedding(shape: [1, {shape}]): {sample}')
return emb
def run_client(host: str = '127.0.0.1', port: int = 8000):
client = OpenAI(
api_key='EMPTY',
base_url=f'http://{host}:{port}/v1',
)
model = client.models.list().data[0].id
print(f'model: {model}')
messages = [{
'role':
'user',
'content': [
# {
# 'type': 'image',
# 'image': 'http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/animal.png'
# },
{
'type': 'text',
'text': 'What is the capital of China?'
},
]
}]
infer(client, model, messages)
if __name__ == '__main__':
from swift.llm import run_deploy, DeployArguments
with run_deploy(
DeployArguments(
model='Qwen/Qwen3-Embedding-0.6B', # GME/GTE models or your checkpoints are also supported
task_type='embedding',
infer_backend='vllm',
verbose=False,
log_interval=-1)) as port:
run_client(port=port)