Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 58 additions & 1 deletion src/agentscope_runtime/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,21 @@
from .generations.image_generation_wan25 import (
ImageGenerationWan25,
)
from .generations.image_generation_wan26 import (
ImageGenerationWan26,
)
from .generations.async_image_to_video_wan26 import (
ImageToVideoWan26Submit,
)
from .generations.async_text_to_video_wan26 import (
TextToVideoWan26Submit,
)
from .generations.fetch_wan import (
WanVideoFetch,
)
from .generations.qwen_image_edit_new import (
QwenImageEditNew,
)


class McpServerMeta(BaseModel):
Expand Down Expand Up @@ -102,7 +117,11 @@ class McpServerMeta(BaseModel):
),
"modelstudio_qwen_image": McpServerMeta(
instructions="基于通义千问大模型的智能图像生成服务,提供高质量的图像处理和编辑功能",
components=[QwenImageGen, QwenImageEdit],
components=[
QwenImageGen,
QwenImageEdit,
QwenImageEditNew,
],
),
"modelstudio_web_search": McpServerMeta(
instructions="提供实时互联网搜索服务,提供准确及时的信息检索功能",
Expand All @@ -116,4 +135,42 @@ class McpServerMeta(BaseModel):
instructions="基于通义千问大模型的语音合成服务,支持多种语言语音合成功能",
components=[QwenTextToSpeech],
),
"modelstudio_wan_multimodal": McpServerMeta(
instructions=(
"通义万相(Wan)多模态生成统一服务,支持文本/图像/语音到图像或视频的多种AI生成能力,"
"包括图像生成、编辑、风格迁移、文生视频、图生视频、数字人表演等。"
"当前支持 anx-style-repaint-v1、wan2.1、wan2.2、wan2.5、wan2.6 模型版本"
"(wan2.1 仅用于基础图像编辑,wanx-style-repaint-v1仅用于人体风格重绘),各版本能力如下:\n"
"- 文本生成图像:wan2.2、wan2.5、wan2.6 均支持,优先使用 wan2.6(画质最优),其次 wan2.5\n"
"- 图像编辑:wan2.1(基础)、wan2.5、wan2.6 支持,优先使用 wan2.6\n"
"- 文本/图像生成视频:wan2.2、wan2.5、wan2.6 均支持,但能力逐代增强:\n"
" - 视频时长:wan2.2 仅支持 5 秒;wan2.5 支持 5 或 10 秒;wan2.6 支持 5、10 或 15 秒\n" # noqa
" - 音频能力:支持自动配音或传入自定义音频实现声画同步(仅 wan2.5 和 wan2.6 支持)\n"
" - 多镜头叙事:可生成包含多个镜头的视频,并在切换时保持主体一致性(仅 wan2.6 支持)\n"
"- 数字人生成(音频驱动人物视频):基于单张人物图像与音频,生成自然说话、唱歌或表演视频;"
"支持肖像、半身或全身画面,不限画幅比例;由 wan2.2 提供基础支持,wan2.5/2.6 支持更高质量与音频同步\n"
"注意:异步视频仅提交生成任务,需配合的 Fetch 工具获取结果。\n"
"注意:不同任务对模型版本有严格依赖,请务必结合具体工具描述中的[模型版本]信息进行调用。"
),
components=[
# 基于通义万相大模型的智能图像生成服务,提供高质量的图像处理和编辑功能
ImageGeneration, # wan2.2-t2i 文生图
ImageEdit, # wan2.1-edit 图生图
ImageStyleRepaint, # wan2.2-repaint 图风格迁移
# 基于通义万相大模型提供AI视频生成服务,支持文本到视频、图像到视频和语音到视频的多模态生成功能
TextToVideoSubmit, # wan2.2-t2v 文生视频提交
ImageToVideoSubmit, # wan2.2-i2v 图生视频提交
SpeechToVideoSubmit, # wan2.2-s2v
# 基于通义万相大模型2.5版本提供的图像和视频生成服务
ImageGenerationWan25, # wan2.5 文生图
ImageEditWan25, # wan2.5 图生图
TextToVideoWan25Submit, # wan2.5 文生视频提交
ImageToVideoWan25Submit, # wan2.5 图生视频提交
# 基于通义万相2.6大模型的智能图像生成服务,提供高质量的图像处理和编辑功能
ImageGenerationWan26, # wanx2.6-t2i 文生图
ImageToVideoWan26Submit, # wan2.6-i2v 图生视频提交
TextToVideoWan26Submit, # wan2.6-t2v 文生视频提交
WanVideoFetch, # wan 所有异步视频任务结果查询
],
),
}
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,9 @@ class ImageToVideoSubmit(

name: str = "modelstudio_image_to_video_submit_task"
description: str = (
"通义万相-图生视频模型的异步任务提交工具。根据首帧图像和文本提示词,生成时长为5秒的无声视频。"
"同时支持特效模板,可添加“魔法悬浮”、“气球膨胀”等效果,适用于创意视频制作、娱乐特效展示等场景。"
"[版本: wan2.2] 通义万相图生视频模型(wan2.2-i2v-flash)异步任务提交工具。基于单张首帧图像和文本提示,生成一段5秒的无声动态视频。\n" # noqa
"支持分辨率:480P、720P、1080P;不支持音频(无声视频)。\n"
"提供特效模板(如“魔法悬浮”、“气球膨胀”),适用于创意视频制作、娱乐特效展示等场景。\n"
)

@trace(trace_type="AIGC", trace_name="image_to_video_submit")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,9 @@ class ImageToVideoWan25Submit(

name: str = "modelstudio_image_to_video_wan25_submit_task"
description: str = (
"通义万相-图生视频模型的异步任务提交工具。根据首帧图像和文本提示词,生成时长为5秒的无声视频。"
"同时支持特效模板,可添加“魔法悬浮”、“气球膨胀”等效果,适用于创意视频制作、娱乐特效展示等场景。"
"[版本: wan2.5] 通义万相图生视频模型(wan2.5-i2v)异步提交工具。基于单张首帧图像和文本提示,生成一段流畅的有声视频。\n" # noqa
"支持视频时长:5秒或10秒;分辨率:480P、720P、1080P;支持自动配音或传入自定义音频,实现音画同步。\n"
"提供特效模板(如“魔法悬浮”、“气球膨胀”),适用于创意视频、娱乐特效等场景。\n"
)

@trace(trace_type="AIGC", trace_name="image_to_video_wan25_submit")
Expand Down
290 changes: 290 additions & 0 deletions src/agentscope_runtime/tools/generations/async_image_to_video_wan26.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,290 @@
# -*- coding: utf-8 -*-
# pylint:disable=abstract-method, deprecated-module, wrong-import-order

import os
import uuid
from http import HTTPStatus
from typing import Any, Optional

from dashscope.aigc.video_synthesis import AioVideoSynthesis
from mcp.server.fastmcp import Context
from pydantic import BaseModel, Field

from ..base import Tool
from ..utils.api_key_util import get_api_key, ApiNames
from ...engine.tracing import trace, TracingUtil


class ImageToVideoWan26SubmitInput(BaseModel):
"""
Input model for submitting an image-to-video task using wan2.6-i2v.
"""

image_url: str = Field(
...,
description="首帧图像的公网可访问URL,支持 JPG/PNG 格式,Base64编码径",
)
prompt: Optional[str] = Field(
default=None,
description="正向提示词,描述希望视频中发生的动作或变化,例如“镜头缓慢推进,风吹动树叶”。",
)
negative_prompt: Optional[str] = Field(
default=None,
description="反向提示词,用于排除不希望出现的内容,例如“模糊、闪烁、变形、水印”。",
)
audio_url: Optional[str] = Field(
default=None,
description="自定义音频文件的公网URL。参数优先级:audio_url > audio。",
)
audio: Optional[bool] = Field(
default=None,
description="是否自动生成配音。仅在 audio_url 未提供时生效。",
)
template: Optional[str] = Field(
default=None,
description="视频特效模板,如:squish(解压捏捏)、flying(魔法悬浮)、carousel(时光木马)等。",
)
resolution: Optional[str] = Field(
default=None,
description="视频分辨率,可选值:'720P'、'1080P'。默认为 '1080P'。",
)
duration: Optional[int] = Field(
default=None,
description="视频时长(秒),可选值:5、10、15。默认为 5。",
)
prompt_extend: Optional[bool] = Field(
default=None,
description=" Prompt 智能改写。开启后可提升生成效果,并使 shot_type 生效,"
"默认值为 true:开启智能改写。false:不开启智能改写。",
)
shot_type: Optional[str] = Field(
default=None,
description="镜头类型,仅在 prompt_extend=true 时生效。"
"可选值:'single'(单镜头,默认)、'multi'(多镜头切换)。"
"参数优先级高于 prompt 中的描述。",
)
watermark: Optional[bool] = Field(
default=None,
description="是否在视频中添加水印(如“AI生成”标识)。默认不添加。",
)
seed: Optional[int] = Field(
default=None,
description="随机种子,用于结果复现。",
)
ctx: Optional[Context] = Field(
default=None,
description="HTTP request context containing headers for mcp only, "
"don't generate it",
)


class ImageToVideoWan26SubmitOutput(BaseModel):
"""
Output of the image-to-video task submission.
"""

task_id: str = Field(
title="Task ID",
description="异步任务的唯一标识符。",
)
task_status: str = Field(
title="Task Status",
description="视频生成的任务状态,PENDING:任务排队中,RUNNING:任务处理中,SUCCEEDED:任务执行成功,"
"FAILED:任务执行失败,CANCELED:任务取消成功,UNKNOWN:任务不存在或状态未知",
)
request_id: Optional[str] = Field(
default=None,
title="Request ID",
description="本次请求的唯一ID,可用于日志追踪。",
)


class ImageToVideoWan26Submit(
Tool[ImageToVideoWan26SubmitInput, ImageToVideoWan26SubmitOutput],
):
"""
Submit an image-to-video generation task using the wan2.6-i2v model.
"""

name: str = "modelstudio_image_to_video_wan26_submit_task"
description: str = (
"[版本: wan2.6] 通义万相图生视频模型(wan2.6-i2v)异步任务提交工具。基于单张首帧图像和文本提示,生成一段流畅的有声视频。\n" # noqa
"支持视频时长:5秒、10秒或15秒;分辨率:720P、1080P;支持自动配音或传入自定义音频,实现音画同步。\n"
"独家支持多镜头叙事:可生成包含多个镜头的视频,并在镜头切换时保持主体一致性。\n"
"提供特效模板(如“魔法悬浮”、“气球膨胀”),适用于创意视频制作、娱乐特效展示等场景。\n"
)

@trace(trace_type="AIGC", trace_name="image_to_video_wan26_submit")
async def arun(
self,
args: ImageToVideoWan26SubmitInput,
**kwargs: Any,
) -> ImageToVideoWan26SubmitOutput:
trace_event = kwargs.pop("trace_event", None)
request_id = TracingUtil.get_request_id()

try:
api_key = get_api_key(ApiNames.dashscope_api_key, **kwargs)
except AssertionError as e:
raise ValueError("Please set valid DASHSCOPE_API_KEY!") from e

model_name = kwargs.get(
"model_name",
os.getenv("IMAGE_TO_VIDEO_MODEL_NAME", "wan2.6-i2v"),
)

# 构建 parameters(全部为可选参数)
parameters = {}
if args.audio is not None:
parameters["audio"] = args.audio
if args.resolution:
parameters["resolution"] = args.resolution
if args.duration is not None:
parameters["duration"] = args.duration
if args.prompt_extend is not None:
parameters["prompt_extend"] = args.prompt_extend
if args.shot_type:
parameters["shot_type"] = args.shot_type
if args.watermark is not None:
parameters["watermark"] = args.watermark
if args.seed is not None:
parameters["seed"] = args.seed
aio_video_synthesis = AioVideoSynthesis()

# ⚠️ 关键修正:DashScope SDK 要求使用 img_url,不是 input
response = await aio_video_synthesis.async_call(
model=model_name,
api_key=api_key,
img_url=args.image_url, # ✅ 正确参数名
prompt=args.prompt,
negative_prompt=args.negative_prompt,
audio_url=args.audio_url,
template=args.template,
**parameters,
)

if trace_event:
trace_event.on_log(
"",
**{
"step_suffix": "results",
"payload": {
"request_id": request_id,
"submit_task": response,
},
},
)

if (
response.status_code != HTTPStatus.OK
or not response.output
or response.output.task_status in ["FAILED", "CANCELED"]
):
raise RuntimeError(
f"Failed to submit image-to-video task: {response}",
)

request_id = response.request_id or request_id or str(uuid.uuid4())

return ImageToVideoWan26SubmitOutput(
request_id=request_id,
task_id=response.output.task_id,
task_status=response.output.task_status,
)


# ========== Fetch 部分保持不变(仅微调描述) ==========


class ImageToVideoWan26FetchInput(BaseModel): # noqa
task_id: str = Field(
title="Task ID",
description="要查询的视频生成任务ID。",
)
ctx: Optional[Context] = Field(
default=None,
description="HTTP request context containing headers for mcp only, "
"don't generate it",
)


class ImageToVideoWan26FetchOutput(BaseModel):
video_url: str = Field(
title="Video URL",
description="生成视频的公网可访问URL(MP4格式)。",
)
task_id: str = Field(
title="Task ID",
description="任务ID,与输入一致。",
)
task_status: str = Field(
title="Task Status",
description="任务最终状态,成功时为 SUCCEEDED。",
)
request_id: Optional[str] = Field(
default=None,
title="Request ID",
description="请求ID,用于追踪。",
)


class ImageToVideoWan26Fetch(
Tool[ImageToVideoWan26FetchInput, ImageToVideoWan26FetchOutput],
):
name: str = "modelstudio_image_to_video_wan26_fetch_result"
description: str = (
"查询通义万相 wan2.6-i2v 图生视频任务的结果。"
"输入 Task ID,返回生成的视频 URL 及任务状态。"
"请在提交任务后轮询此接口,直到任务状态变为 SUCCEEDED。"
)

@trace(trace_type="AIGC", trace_name="image_to_video_wan26_fetch")
async def arun(
self,
args: ImageToVideoWan26FetchInput,
**kwargs: Any,
) -> ImageToVideoWan26FetchOutput:
trace_event = kwargs.pop("trace_event", None)
request_id = TracingUtil.get_request_id()

try:
api_key = get_api_key(ApiNames.dashscope_api_key, **kwargs)
except AssertionError as e:
raise ValueError("Please set valid DASHSCOPE_API_KEY!") from e

aio_video_synthesis = AioVideoSynthesis()

response = await aio_video_synthesis.fetch(
api_key=api_key,
task=args.task_id,
)

if trace_event:
trace_event.on_log(
"",
**{
"step_suffix": "results",
"payload": {
"request_id": response.request_id,
"fetch_result": response,
},
},
)

if (
response.status_code != HTTPStatus.OK
or not response.output
or response.output.task_status in ["FAILED", "CANCELED"]
):
raise RuntimeError(
f"Failed to fetch image-to-video result: {response}",
)

request_id = response.request_id or request_id or str(uuid.uuid4())

return ImageToVideoWan26FetchOutput(
video_url=response.output.video_url,
task_id=response.output.task_id,
task_status=response.output.task_status,
request_id=request_id,
)
Loading
Loading