Skip to content

Commit e32fc77

Browse files
authored
Feat/Added the GenerateVideo feature (modelscope#759)
1 parent b605b6b commit e32fc77

22 files changed

Lines changed: 10366 additions & 0 deletions

ms_agent/cli/run.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,13 @@ def define_args(parsers: argparse.ArgumentParser):
7777
type=str,
7878
default=None,
7979
help='API key for accessing ModelScope api-inference services.')
80+
parser.add_argument(
81+
'--animation_mode',
82+
required=False,
83+
type=str,
84+
choices=['auto', 'human'],
85+
default=None,
86+
help='Animation mode for video_generate project: auto (default) or human.')
8087
parser.set_defaults(func=subparser_func)
8188

8289
def execute(self):
@@ -91,6 +98,10 @@ def execute(self):
9198
self.args.trust_remote_code) # noqa
9299
self.args.load_cache = strtobool(self.args.load_cache)
93100

101+
# Propagate animation mode via environment variable for downstream code agents
102+
if getattr(self.args, 'animation_mode', None):
103+
os.environ['MS_ANIMATION_MODE'] = self.args.animation_mode
104+
94105
config = Config.from_task(self.args.config)
95106

96107
if Config.is_workflow(config):

projects/video_generate/README.md

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# Video Generate
2+
3+
一个“AI 科普短视频”工作流。支持全自动与人工协同两种模式,产生脚本、语音、插画/动画、字幕,并合成为成片。
4+
5+
## 快速检查(必读)
6+
7+
在首次运行前,建议完成以下检查:
8+
9+
1) 运行环境
10+
- Windows / Python 3.10+(推荐)
11+
- 已安装 FFmpeg,并添加到 PATH(ffmpeg -version 可执行)
12+
- Manim 可用(manim -h 可执行)
13+
14+
2) Python 依赖(若未安装)
15+
- 依赖在仓库 requirements 下,或按需安装:moviepy、Pillow、edge-tts、matplotlib 等
16+
17+
3) 资源文件(已随仓库提供)
18+
- 自定义字体与背景音乐:`projects/video_generate/core/asset/`
19+
- `bg_audio.mp3`
20+
- `字小魂扶摇手书(商用需授权).ttf`
21+
22+
4) 可选的 API Key(全自动模式常用)
23+
- MODELSCOPE_API_KEY:用于 ModelScope 模型调用
24+
25+
提示:未设置 Key 也可运行“只合成/人工模式”,但全自动模式可能因缺少 LLM 能力失败。
26+
27+
## 运行方式一:全自动模式(auto)
28+
29+
按主题从零到一自动生成并合成视频:
30+
31+
```powershell
32+
# 可选:设置 API Key
33+
$env:MODELSCOPE_API_KEY="你的ModelScopeKey"
34+
35+
# 运行三步工作流(脚本 → 素材 → 合成)
36+
ms-agent run --config "ms-agent/projects/video_generate/workflow.yaml" --query "主题" --animation_mode auto --trust_remote_code true
37+
```
38+
39+
输出将位于 `ms-agent/projects/video_generate/output/<主题>/`
40+
41+
## 运行方式二:人工模式(human)
42+
43+
适合需要人工把控动画的流程:自动产出“脚本/语音/插画/字幕/占位前景”,然后在“人工工作室”内逐段制作/审批前景动画,最终一键完整合成。
44+
45+
1) 先生成素材(不自动渲染 Manim)
46+
```powershell
47+
ms-agent run --config "ms-agent/projects/video_generate/workflow.yaml" --query "主题" --animation_mode human --trust_remote_code true
48+
```
49+
50+
2) 打开人工工作室(指向上一步生成的主题目录)
51+
```powershell
52+
# 确保将 ms-agent 包目录加入 PYTHONPATH
53+
$env:PYTHONPATH="项目本地目录\ms-agent"
54+
55+
# 以模块方式启动交互式工作室
56+
python -m projects.video_generate.core.human_animation_studio "项目本地目录\ms-agent\projects\video_generate\output\主题"
57+
```
58+
59+
在工作室中:
60+
- 1 查看待制作任务 → 2 开始制作动画 → 生成/改进 Manim 代码 → 创建预览 → 批准动画
61+
- 当所有片段完成后,系统会自动合并前景并执行“完整合成(背景+字幕+音频+前景+音乐)”生成成片
62+
63+
## 运行方式三:只合成(已有素材)
64+
65+
如果目录中已经有 `asset_info.json`(或你只想重新合成):
66+
67+
```powershell
68+
ms-agent run --config "ms-agent/projects/video_generate/workflow_from_assets.yaml" `
69+
--query "项目本地目录\ms-agent\projects\video_generate\output\<主题>\asset_info.json" `
70+
--animation_mode human `
71+
--trust_remote_code true
72+
```
73+
74+
该流程只执行合成,不会重新生成脚本/插画/动画。若存在已审批的透明前景(finals/scene_*_final.mov),将优先使用。
75+
76+
## 目录说明
77+
- `video_agent.py`:三步逻辑的 Agent 封装
78+
- `workflow.yaml`:三步编排;`workflow_from_assets.yaml`:只合成编排
79+
- `core/workflow.py`:主流程;`core/human_animation_studio.py`:人工工作室
80+
- `core/asset/`:字体与背景音乐
81+
- `output/`:运行产物
82+
- `scripts/compose_from_asset_info.py`:从现有 `asset_info.json` 直接合成的辅助脚本
83+
84+
## 常见问题
85+
- 退出码 1:
86+
- 检查是否缺少 MODELSCOPE_API_KEY(全自动模式常见)
87+
- 检查 ffmpeg / manim 是否可执行(PATH)
88+
- 查看终端最后 80 行日志定位具体异常
89+
- 字体/背景不一致:
90+
- 背景由 `create_manual_background` 生成,字体/音乐来自 `core/asset/`;确保该目录可读
91+
- TTS/事件循环冲突:
92+
- 已内置 loop-safe 处理;若仍报错,重试并贴出日志尾部
93+
94+
## 许可证与注意
95+
- 自定义字体文件标注为“商用需授权”,请在合规授权范围内使用
96+
- 背景音乐仅作示例,商业使用请更换或确保版权无虞

projects/video_generate/__init__.py

Whitespace-only changes.

projects/video_generate/core/__init__.py

Whitespace-only changes.
Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
from enum import Enum
2+
from typing import Dict, List, Optional, Any
3+
from dataclasses import dataclass
4+
import json
5+
import os
6+
7+
class AnimationProductionMode(Enum):
8+
"""动画制作模式"""
9+
AUTO = "auto" # 全自动模式
10+
HUMAN_CONTROLLED = "human" # 人工控制模式
11+
12+
class AnimationStatus(Enum):
13+
"""动画状态"""
14+
PENDING = "pending" # 等待制作
15+
DRAFT = "draft" # 草稿阶段
16+
PREVIEW = "preview" # 预览阶段
17+
REVISION = "revision" # 修订中
18+
APPROVED = "approved" # 已批准
19+
COMPLETED = "completed" # 制作完成
20+
FAILED = "failed" # 制作失败
21+
22+
@dataclass
23+
class AnimationTask:
24+
"""动画任务数据结构"""
25+
task_id: str
26+
segment_index: int
27+
content: str
28+
content_type: str
29+
mode: AnimationProductionMode
30+
status: AnimationStatus
31+
32+
# 制作相关
33+
script: Optional[str] = None
34+
manim_code: Optional[str] = None
35+
preview_video_path: Optional[str] = None
36+
final_video_path: Optional[str] = None
37+
placeholder_path: Optional[str] = None
38+
39+
# 人机交互
40+
human_feedback: List[str] = None
41+
revision_count: int = 0
42+
max_revisions: int = 5
43+
44+
# 时间信息
45+
audio_duration: float = 8.0
46+
creation_time: Optional[str] = None
47+
completion_time: Optional[str] = None
48+
49+
def __post_init__(self):
50+
if self.human_feedback is None:
51+
self.human_feedback = []
52+
53+
@dataclass
54+
class PlaceholderConfig:
55+
"""占位符配置"""
56+
width: int = 1280
57+
height: int = 720
58+
background_color: str = "#f0f0f0"
59+
text_color: str = "#333333"
60+
font_size: int = 48
61+
placeholder_text: str = "动画制作中..."
62+
show_content_preview: bool = True
63+
show_progress_indicator: bool = True
64+
65+
class AnimationTaskManager:
66+
"""动画任务管理"""
67+
68+
def __init__(self, project_dir):
69+
self.project_dir = project_dir
70+
self.tasks_file = os.path.join(project_dir, "animation_tasks.json")
71+
self.tasks: Dict[str, AnimationTask] = {}
72+
self.load_tasks()
73+
74+
def create_task(self, segment_index, content, content_type, mode, audio_duration):
75+
"""创建新动画任务,重复任务直接返回ID"""
76+
import uuid
77+
from datetime import datetime
78+
79+
# 检查是否已存在相同段落的任务
80+
existing_task = self.get_task_by_segment(segment_index, content_type)
81+
if existing_task:
82+
print(f"发现已存在的任务: {existing_task.task_id}")
83+
return existing_task.task_id
84+
85+
task_id = f"anim_{segment_index}_{uuid.uuid4().hex[:8]}"
86+
87+
task = AnimationTask(
88+
task_id=task_id,
89+
segment_index=segment_index,
90+
content=content,
91+
content_type=content_type,
92+
mode=mode,
93+
status=AnimationStatus.PENDING,
94+
audio_duration=audio_duration,
95+
creation_time=datetime.now().isoformat()
96+
)
97+
98+
self.tasks[task_id] = task
99+
self.save_tasks()
100+
print(f"创建新任务: {task_id}")
101+
return task_id
102+
103+
def get_task_by_segment(self, segment_index, content_type):
104+
"""根据段落索引和内容类型查找任务"""
105+
for task in self.tasks.values():
106+
if task.segment_index == segment_index and task.content_type == content_type:
107+
return task
108+
return None
109+
110+
def update_task_status(self, task_id, status):
111+
"""更新任务状态"""
112+
if task_id in self.tasks:
113+
self.tasks[task_id].status = status
114+
self.save_tasks()
115+
116+
def add_human_feedback(self, task_id, feedback):
117+
"""添加人工反馈"""
118+
if task_id in self.tasks:
119+
self.tasks[task_id].human_feedback.append(feedback)
120+
self.tasks[task_id].revision_count += 1
121+
self.save_tasks()
122+
123+
def get_task(self, task_id):
124+
"""获取任务"""
125+
return self.tasks.get(task_id)
126+
127+
def get_tasks_by_status(self, status):
128+
"""根据状态获取任务列表"""
129+
return [task for task in self.tasks.values() if task.status == status]
130+
131+
def save_tasks(self):
132+
"""保存任务到文件"""
133+
import json
134+
from dataclasses import asdict
135+
136+
tasks_data = {}
137+
for task_id, task in self.tasks.items():
138+
task_dict = asdict(task)
139+
# 处理枚举类型
140+
task_dict['mode'] = task.mode.value
141+
task_dict['status'] = task.status.value
142+
tasks_data[task_id] = task_dict
143+
144+
with open(self.tasks_file, 'w', encoding='utf-8') as f:
145+
json.dump(tasks_data, f, ensure_ascii=False, indent=2)
146+
147+
def load_tasks(self):
148+
"""从文件加载任务"""
149+
if not os.path.exists(self.tasks_file):
150+
return
151+
152+
try:
153+
with open(self.tasks_file, 'r', encoding='utf-8') as f:
154+
tasks_data = json.load(f)
155+
156+
for task_id, task_dict in tasks_data.items():
157+
# 恢复枚举类型
158+
task_dict['mode'] = AnimationProductionMode(task_dict['mode'])
159+
task_dict['status'] = AnimationStatus(task_dict['status'])
160+
161+
self.tasks[task_id] = AnimationTask(**task_dict)
162+
163+
except Exception as e:
164+
print(f"加载任务文件失败: {e}")
165+
166+
class PlaceholderGenerator:
167+
"""占位符生成工具"""
168+
169+
def __init__(self, config = None):
170+
self.config = config or PlaceholderConfig()
171+
172+
def create_placeholder(self, task, output_path):
173+
"""创建占位符视频"""
174+
from PIL import Image, ImageDraw, ImageFont
175+
import tempfile
176+
import subprocess
177+
178+
# 创建占位符图片
179+
img = Image.new('RGB', (self.config.width, self.config.height),
180+
self.config.background_color)
181+
draw = ImageDraw.Draw(img)
182+
183+
# 添加占位文本
184+
try:
185+
# 尝试使用自定义字体
186+
font_path = os.path.join(os.path.dirname(__file__), 'asset', '字魂龙吟手书(商用需授权).ttf')
187+
if os.path.exists(font_path):
188+
font = ImageFont.truetype(font_path, self.config.font_size)
189+
else:
190+
font = ImageFont.load_default()
191+
except:
192+
font = ImageFont.load_default()
193+
194+
# 主标题
195+
title = self.config.placeholder_text
196+
title_bbox = draw.textbbox((0, 0), title, font=font)
197+
title_width = title_bbox[2] - title_bbox[0]
198+
title_height = title_bbox[3] - title_bbox[1]
199+
title_x = (self.config.width - title_width) // 2
200+
title_y = self.config.height // 3
201+
202+
draw.text((title_x, title_y), title, fill=self.config.text_color, font=font)
203+
204+
# 内容预览
205+
if self.config.show_content_preview and task.content:
206+
content_preview = task.content[:50] + "..." if len(task.content) > 50 else task.content
207+
try:
208+
content_font = ImageFont.truetype(font_path, self.config.font_size // 2) if os.path.exists(font_path) else ImageFont.load_default()
209+
except:
210+
content_font = ImageFont.load_default()
211+
212+
content_bbox = draw.textbbox((0, 0), content_preview, font=content_font)
213+
content_width = content_bbox[2] - content_bbox[0]
214+
content_x = (self.config.width - content_width) // 2
215+
content_y = title_y + title_height + 50
216+
217+
draw.text((content_x, content_y), content_preview,
218+
fill=self.config.text_color, font=content_font)
219+
220+
# 进度指示器
221+
if self.config.show_progress_indicator:
222+
status_text = f"状态: {task.status.value} | 类型: {task.content_type}"
223+
try:
224+
status_font = ImageFont.truetype(font_path, self.config.font_size // 3) if os.path.exists(font_path) else ImageFont.load_default()
225+
except:
226+
status_font = ImageFont.load_default()
227+
228+
status_bbox = draw.textbbox((0, 0), status_text, font=status_font)
229+
status_width = status_bbox[2] - status_bbox[0]
230+
status_x = (self.config.width - status_width) // 2
231+
status_y = self.config.height - 100
232+
233+
draw.text((status_x, status_y), status_text,
234+
fill=self.config.text_color, font=status_font)
235+
236+
# 保存占位符图片
237+
temp_img_path = output_path.replace('.mov', '_placeholder.png')
238+
img.save(temp_img_path)
239+
240+
# 转换为视频
241+
try:
242+
cmd = [
243+
'ffmpeg', '-y',
244+
'-f', 'image2', '-loop', '1',
245+
'-i', temp_img_path,
246+
'-t', str(task.audio_duration),
247+
'-pix_fmt', 'yuv420p',
248+
'-r', '15',
249+
output_path
250+
]
251+
subprocess.run(cmd, check=True, capture_output=True)
252+
os.remove(temp_img_path) # 清理临时文件
253+
return output_path
254+
except Exception as e:
255+
print(f"创建占位符视频失败: {e}")
256+
return None
4.79 MB
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)