Skip to content

Commit 00c84f4

Browse files
authored
feat(mcp): add new qwen-image-edit and wan2.6 generations(#53)
Implement QwenImageEdit component for image editing This component allows users to edit images using prompts and returns the edited image URLs. Refactor QwenImageEdit for batch image processing Refactor Qwen image editing component to support batch processing of multiple images. Update input and output models to handle lists of image URLs and adjust method implementations accordingly. Add QwenImageEditNew to components initialization Update qwen_image_edit_new.py wan2.6 and qwem_image_edit_new
1 parent 3a7b035 commit 00c84f4

6 files changed

Lines changed: 1131 additions & 1 deletion

File tree

src/agentscope_bricks/components/__init__.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,19 @@
6060
SpeechToVideoSubmit,
6161
SpeechToVideoFetch,
6262
)
63+
from agentscope_bricks.components.generations.async_text_to_video_wan26 import ( # noqa
64+
TextToVideoWan26Submit,
65+
)
66+
from agentscope_bricks.components.generations.async_image_to_video_wan26 import ( # noqa
67+
ImageToVideoWan26Submit,
68+
)
69+
from agentscope_bricks.components.generations.image_generation_wan26 import ( # noqa
70+
ImageGenerationWan26,
71+
)
72+
from agentscope_bricks.components.generations.fetch_wan import WanVideoFetch
73+
from agentscope_bricks.components.generations.qwen_image_edit_new import (
74+
QwenImageEditNew,
75+
) # noqa
6376

6477

6578
class McpServerMeta(BaseModel):
@@ -102,7 +115,11 @@ class McpServerMeta(BaseModel):
102115
),
103116
"modelstudio_qwen_image": McpServerMeta(
104117
instructions="基于通义千问大模型的智能图像生成服务,提供高质量的图像处理和编辑功能",
105-
components=[QwenImageGen, QwenImageEdit],
118+
components=[
119+
QwenImageGen,
120+
QwenImageEdit,
121+
QwenImageEditNew,
122+
], # ← 新增 QwenImageEditNew
106123
),
107124
"modelstudio_web_search": McpServerMeta(
108125
instructions="提供实时互联网搜索服务,提供准确及时的信息检索功能",
@@ -116,4 +133,13 @@ class McpServerMeta(BaseModel):
116133
instructions="基于通义千问大模型的语音合成服务,支持多种语言语音合成功能",
117134
components=[QwenTextToSpeech],
118135
),
136+
"modelstudio_wan26_media": McpServerMeta(
137+
instructions="基于通义万相大模型2.6版本提供的图像和视频生成服务",
138+
components=[
139+
ImageGenerationWan26,
140+
TextToVideoWan26Submit,
141+
ImageToVideoWan26Submit,
142+
WanVideoFetch,
143+
],
144+
),
119145
}
Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,294 @@
1+
# -*- coding: utf-8 -*-
2+
import os
3+
import uuid
4+
from http import HTTPStatus
5+
from typing import Any, Optional
6+
7+
from dashscope.aigc.video_synthesis import AioVideoSynthesis
8+
from mcp.server.fastmcp import Context
9+
from pydantic import BaseModel, Field
10+
11+
from agentscope_bricks.base.component import Component
12+
from agentscope_bricks.utils.tracing_utils.wrapper import trace
13+
from agentscope_bricks.utils.api_key_util import ApiNames, get_api_key
14+
from agentscope_bricks.utils.tracing_utils import TracingUtil
15+
16+
17+
class ImageToVideoWan26SubmitInput(BaseModel):
18+
"""
19+
Input model for submitting an image-to-video task using wan2.6-i2v.
20+
"""
21+
22+
image_url: str = Field(
23+
...,
24+
description="输入图像,支持公网URL、Base64编码或本地文件路径",
25+
)
26+
prompt: Optional[str] = Field(
27+
default=None,
28+
description="正向提示词,描述希望视频中发生的动作或变化,例如“镜头缓慢推进,风吹动树叶”。",
29+
)
30+
negative_prompt: Optional[str] = Field(
31+
default=None,
32+
description="反向提示词,用于排除不希望出现的内容,例如“模糊、闪烁、变形、水印”。",
33+
)
34+
audio_url: Optional[str] = Field(
35+
default=None,
36+
description="自定义音频文件的公网URL。参数优先级:audio_url > audio。",
37+
)
38+
audio: Optional[bool] = Field(
39+
default=None,
40+
description="是否自动生成配音。仅在 audio_url 未提供时生效。",
41+
)
42+
template: Optional[str] = Field(
43+
default=None,
44+
description="视频特效模板,如:squish(解压捏捏)、flying(魔法悬浮)、carousel(时光木马)等。",
45+
)
46+
resolution: Optional[str] = Field(
47+
default=None,
48+
description="视频分辨率,可选值:'720P'、'1080P'。默认为 '1080P'。",
49+
)
50+
duration: Optional[int] = Field(
51+
default=None,
52+
description="视频时长(秒),可选值:5、10、15。默认为 5。",
53+
)
54+
prompt_extend: Optional[bool] = Field(
55+
default=None,
56+
description=" Prompt 智能改写。开启后可提升生成效果,并使 shot_type 生效,"
57+
"默认值为 true:开启智能改写。false:不开启智能改写。",
58+
)
59+
shot_type: Optional[str] = Field(
60+
default=None,
61+
description="镜头类型,仅在 prompt_extend=true 时生效。"
62+
"可选值:'single'(单镜头,默认)、'multi'(多镜头切换)。"
63+
"参数优先级高于 prompt 中的描述。",
64+
)
65+
watermark: Optional[bool] = Field(
66+
default=None,
67+
description="是否在视频中添加水印(如“AI生成”标识)。默认不添加。",
68+
)
69+
seed: Optional[int] = Field(
70+
default=None,
71+
description="随机种子,用于结果复现。",
72+
)
73+
ctx: Optional[Context] = Field(
74+
default=None,
75+
description="HTTP request context containing headers for mcp only, "
76+
"don't generate it",
77+
)
78+
79+
80+
class ImageToVideoWan26SubmitOutput(BaseModel):
81+
"""
82+
Output of the image-to-video task submission.
83+
"""
84+
85+
task_id: str = Field(
86+
title="Task ID",
87+
description="异步任务的唯一标识符。",
88+
)
89+
task_status: str = Field(
90+
title="Task Status",
91+
description="视频生成的任务状态,PENDING:任务排队中,RUNNING:任务处理中,SUCCEEDED:任务执行成功,"
92+
"FAILED:任务执行失败,CANCELED:任务取消成功,UNKNOWN:任务不存在或状态未知",
93+
)
94+
request_id: Optional[str] = Field(
95+
default=None,
96+
title="Request ID",
97+
description="本次请求的唯一ID,可用于日志追踪。",
98+
)
99+
100+
101+
class ImageToVideoWan26Submit(
102+
Component[ImageToVideoWan26SubmitInput, ImageToVideoWan26SubmitOutput],
103+
):
104+
"""
105+
Submit an image-to-video generation task using the wan2.6-i2v model.
106+
"""
107+
108+
name: str = "modelstudio_image_to_video_wan26_submit_task"
109+
description: str = (
110+
"[版本: wan2.6] 通义万相图生视频模型(wan2.6-i2v)异步任务提交工具。基于单张首帧图像和文本提示,生成一段流畅的有声视频。\n" # noqa
111+
"支持视频时长:5秒、10秒或15秒;分辨率:720P、1080P;支持自动配音或传入自定义音频,实现音画同步。\n"
112+
"独家支持多镜头叙事:可生成包含多个镜头的视频,并在镜头切换时保持主体一致性。\n"
113+
"提供特效模板(如“魔法悬浮”、“气球膨胀”),适用于创意视频制作、娱乐特效展示等场景。\n"
114+
)
115+
116+
@trace(trace_type="AIGC", trace_name="image_to_video_wan26_submit")
117+
async def arun(
118+
self,
119+
args: ImageToVideoWan26SubmitInput,
120+
**kwargs: Any,
121+
) -> ImageToVideoWan26SubmitOutput:
122+
trace_event = kwargs.pop("trace_event", None)
123+
request_id = TracingUtil.get_request_id()
124+
125+
try:
126+
api_key = get_api_key(ApiNames.dashscope_api_key, **kwargs)
127+
except AssertionError:
128+
raise ValueError("Please set valid DASHSCOPE_API_KEY!")
129+
130+
model_name = kwargs.get(
131+
"model_name",
132+
os.getenv("IMAGE_TO_VIDEO_MODEL_NAME", "wan2.6-i2v"),
133+
)
134+
135+
# 构建 parameters(全部为可选参数)
136+
parameters = {}
137+
if args.audio is not None:
138+
parameters["audio"] = args.audio
139+
if args.resolution:
140+
parameters["resolution"] = args.resolution
141+
if args.duration is not None:
142+
parameters["duration"] = args.duration
143+
if args.prompt_extend is not None:
144+
parameters["prompt_extend"] = args.prompt_extend
145+
if args.shot_type:
146+
parameters["shot_type"] = args.shot_type
147+
if args.watermark is not None:
148+
parameters["watermark"] = args.watermark
149+
if args.seed is not None:
150+
parameters["seed"] = args.seed
151+
aio_video_synthesis = AioVideoSynthesis()
152+
153+
response = await aio_video_synthesis.async_call(
154+
model=model_name,
155+
api_key=api_key,
156+
img_url=args.image_url,
157+
prompt=args.prompt,
158+
negative_prompt=args.negative_prompt,
159+
audio_url=args.audio_url,
160+
template=args.template,
161+
**parameters,
162+
)
163+
164+
if trace_event:
165+
trace_event.on_log(
166+
"",
167+
**{
168+
"step_suffix": "results",
169+
"payload": {
170+
"request_id": request_id,
171+
"submit_task": response,
172+
},
173+
},
174+
)
175+
176+
if (
177+
response.status_code != HTTPStatus.OK
178+
or not response.output
179+
or response.output.task_status in ["FAILED", "CANCELED"]
180+
):
181+
raise RuntimeError(
182+
f"Failed to submit image-to-video task: {response}",
183+
)
184+
185+
if not request_id:
186+
request_id = (
187+
response.request_id
188+
if response.request_id
189+
else str(uuid.uuid4())
190+
)
191+
192+
result = ImageToVideoWan26SubmitOutput(
193+
request_id=request_id,
194+
task_id=response.output.task_id,
195+
task_status=response.output.task_status,
196+
)
197+
return result
198+
199+
200+
# ========== Fetch 部分保持不变(仅微调描述) ==========
201+
202+
203+
class ImageToVideoWan26FetchInput(BaseModel): # noqa
204+
task_id: str = Field(
205+
title="Task ID",
206+
description="要查询的视频生成任务ID。",
207+
)
208+
ctx: Optional[Context] = Field(
209+
default=None,
210+
description="HTTP request context containing headers for mcp only, "
211+
"don't generate it",
212+
)
213+
214+
215+
class ImageToVideoWan26FetchOutput(BaseModel):
216+
video_url: str = Field(
217+
title="Video URL",
218+
description="生成视频的公网可访问URL(MP4格式)。",
219+
)
220+
task_id: str = Field(
221+
title="Task ID",
222+
description="任务ID,与输入一致。",
223+
)
224+
task_status: str = Field(
225+
title="Task Status",
226+
description="任务最终状态,成功时为 SUCCEEDED。",
227+
)
228+
request_id: Optional[str] = Field(
229+
default=None,
230+
title="Request ID",
231+
description="请求ID,用于追踪。",
232+
)
233+
234+
235+
class ImageToVideoWan26Fetch(
236+
Component[ImageToVideoWan26FetchInput, ImageToVideoWan26FetchOutput],
237+
):
238+
name: str = "modelstudio_image_to_video_wan26_fetch_result"
239+
description: str = (
240+
"查询通义万相 wan2.6-i2v 图生视频任务的结果。"
241+
"输入 Task ID,返回生成的视频 URL 及任务状态。"
242+
"请在提交任务后轮询此接口,直到任务状态变为 SUCCEEDED。"
243+
)
244+
245+
@trace(trace_type="AIGC", trace_name="image_to_video_wan26_fetch")
246+
async def arun(
247+
self,
248+
args: ImageToVideoWan26FetchInput,
249+
**kwargs: Any,
250+
) -> ImageToVideoWan26FetchOutput:
251+
trace_event = kwargs.pop("trace_event", None)
252+
request_id = TracingUtil.get_request_id()
253+
254+
try:
255+
api_key = get_api_key(ApiNames.dashscope_api_key, **kwargs)
256+
except AssertionError as e:
257+
raise ValueError("Please set valid DASHSCOPE_API_KEY!") from e
258+
259+
aio_video_synthesis = AioVideoSynthesis()
260+
261+
response = await aio_video_synthesis.fetch(
262+
api_key=api_key,
263+
task=args.task_id,
264+
)
265+
266+
if trace_event:
267+
trace_event.on_log(
268+
"",
269+
**{
270+
"step_suffix": "results",
271+
"payload": {
272+
"request_id": response.request_id,
273+
"fetch_result": response,
274+
},
275+
},
276+
)
277+
278+
if (
279+
response.status_code != HTTPStatus.OK
280+
or not response.output
281+
or response.output.task_status in ["FAILED", "CANCELED"]
282+
):
283+
raise RuntimeError(
284+
f"Failed to fetch image-to-video result: {response}",
285+
)
286+
287+
request_id = response.request_id or request_id or str(uuid.uuid4())
288+
289+
return ImageToVideoWan26FetchOutput(
290+
video_url=response.output.video_url,
291+
task_id=response.output.task_id,
292+
task_status=response.output.task_status,
293+
request_id=request_id,
294+
)

0 commit comments

Comments
 (0)