Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
# 阿里云 DashScope API Key (用于通义千问等模型)
DASHSCOPE_API_KEY=your_dashscope_api_key_here

# DashScope 模型请求端点
# 视频生成端点
DASHSCOPE_VIDEO_CREATE_URL=https://dashscope.aliyuncs.com/api/v1/services/aigc/video-generation/video-synthesis
# 文生图端点
DASHSCOPE_IMAGE_T2I_URL=https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation
# 图生图端点
DASHSCOPE_IMAGE_I2I_URL=https://dashscope.aliyuncs.com/api/v1/services/aigc/image-generation/generation
# 任务查询端点(必须包含 {task_id})
DASHSCOPE_TASK_QUERY_URL_TEMPLATE=https://dashscope.aliyuncs.com/api/v1/tasks/{task_id}

# 阿里云 Access Key (用于 OSS、视频超分等服务)
ALIBABA_CLOUD_ACCESS_KEY_ID=your_aliyun_access_key_id_here
ALIBABA_CLOUD_ACCESS_KEY_SECRET=your_aliyun_access_key_secret_here
Expand All @@ -13,3 +23,37 @@ OSS_BASE_PATH=comic_gen/
# API 服务配置
API_HOST=0.0.0.0
API_PORT=8000

# ===============================
# 模型名称配置
# ===============================
# Wanx 默认模型
WANX_T2V_MODEL_NAME_DEFAULT=wan2.5-t2v-preview
WANX_I2V_MODEL_NAME_DEFAULT=wan2.6-i2v
WANX_R2V_MODEL_NAME_DEFAULT=wan2.6-r2v

# Wanx 走 HTTP 接口的模型列表(逗号分隔)
WANX_HTTP_I2V_MODEL_NAMES=wan2.6-i2v,wan2.5-i2v
WANX_HTTP_R2V_MODEL_NAMES=wan2.6-r2v

# Wanx 图像模型
WANX_IMAGE_T2I_MODEL_NAME_DEFAULT=wan2.6-t2i
WANX_IMAGE_I2I_MODEL_NAME_DEFAULT=wan2.6-image
WANX_IMAGE_HTTP_T2I_MODEL_NAMES=wan2.6-t2i
WANX_IMAGE_HTTP_I2I_MODEL_NAMES=wan2.6-image
WANX_IMAGE_FOUR_REF_MODELS=wan2.6-image

# LLM 各环节模型
LLM_PARSE_NOVEL_MODEL_NAME=qwen-max
LLM_STORYBOARD_ANALYSIS_MODEL_NAME=qwen-max
LLM_STYLE_RECOMMEND_MODEL_NAME=qwen-plus
LLM_STORYBOARD_POLISH_MODEL_NAME=qwen-plus
LLM_VIDEO_POLISH_MODEL_NAME=qwen-plus
LLM_R2V_POLISH_MODEL_NAME=qwen-plus

# 其他模型默认名称 / URL
QWEN_VL_MODEL_NAME_DEFAULT=qwen-vl-plus
DOUBAO_BASE_URL=https://ark.cn-beijing.volces.com/api/v3
DOUBAO_MODEL_NAME_DEFAULT=doubao-seedance-1-0-pro-fast-251015
KLING_BASE_URL=https://api.klingai.com/v1
KLING_MODEL_NAME_DEFAULT=kling-v2-5-turbo
4 changes: 2 additions & 2 deletions src/apps/comic_gen/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from .pipeline import ComicGenPipeline
from .models import Script, VideoTask
from .llm import ScriptProcessor
from ...model_request_settings import MODEL_REQUEST_SETTINGS
from ...utils.oss_utils import OSSImageUploader, sign_oss_urls_in_data
from ...utils import setup_logging
from fastapi.responses import JSONResponse
Expand Down Expand Up @@ -666,7 +667,7 @@ class CreateVideoTaskRequest(BaseModel):
prompt_extend: bool = True
negative_prompt: Optional[str] = None
batch_size: int = 1
model: str = "wan2.6-i2v"
model: str = MODEL_REQUEST_SETTINGS.wanx_i2v_model_name_default
shot_type: str = "single" # 'single' or 'multi' (only for wan2.6-i2v)
generation_mode: str = "i2v" # 'i2v' (image-to-video) or 'r2v' (reference-to-video)
reference_video_urls: List[str] = [] # Reference video URLs for R2V (max 3)
Expand Down Expand Up @@ -1733,4 +1734,3 @@ async def reorder_frames(script_id: str, request: ReorderFramesRequest):
pipeline._save_data()

return {"status": "success", "message": "Frames reordered", "frame_count": len(script.frames)}

3 changes: 2 additions & 1 deletion src/apps/comic_gen/assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from urllib.parse import quote
from .models import Character, Scene, Prop, GenerationStatus, ImageAsset, ImageVariant, MAX_VARIANTS_PER_ASSET
from ...models.image import WanxImageModel
from ...model_request_settings import MODEL_REQUEST_SETTINGS
from ...utils import get_logger
from ...utils.oss_utils import is_object_key

Expand Down Expand Up @@ -138,7 +139,7 @@ def generate_character(self, character: Character, generation_type: str = "all",
effective_generation_prompt = generation_prompt
if ref_image_path:
# Override to I2I model when using reference image
effective_model_name = i2i_model_name or "wan2.6-image"
effective_model_name = i2i_model_name or MODEL_REQUEST_SETTINGS.wanx_image_i2i_model_name_default
logger.debug(f"Reverse generation: Using I2I model {effective_model_name} with reference image")

# Enhance prompt for reverse generation to emphasize reference consistency (only if not already present)
Expand Down
20 changes: 13 additions & 7 deletions src/apps/comic_gen/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,19 @@

from .models import Script, Character, Scene, Prop, StoryboardFrame, GenerationStatus
from ...utils import get_logger
from ...model_request_settings import MODEL_REQUEST_SETTINGS

logger = get_logger(__name__)

class ScriptProcessor:
def __init__(self, api_key: str = None):
self._api_key = api_key
self.parse_novel_model_name = MODEL_REQUEST_SETTINGS.llm_parse_novel_model_name
self.storyboard_analysis_model_name = MODEL_REQUEST_SETTINGS.llm_storyboard_analysis_model_name
self.style_recommend_model_name = MODEL_REQUEST_SETTINGS.llm_style_recommend_model_name
self.storyboard_polish_model_name = MODEL_REQUEST_SETTINGS.llm_storyboard_polish_model_name
self.video_polish_model_name = MODEL_REQUEST_SETTINGS.llm_video_polish_model_name
self.r2v_polish_model_name = MODEL_REQUEST_SETTINGS.llm_r2v_polish_model_name

@property
def api_key(self):
Expand All @@ -40,8 +47,7 @@ def parse_novel(self, title: str, text: str) -> Script:
dashscope.api_key = self.api_key

response = dashscope.Generation.call(
# model='deepseek-v3.2',
model='qwen-max',
model=self.parse_novel_model_name,
prompt=prompt,
result_format='message',
)
Expand Down Expand Up @@ -416,7 +422,7 @@ def analyze_script_for_styles(self, script_text: str) -> List[Dict[str, Any]]:
dashscope.api_key = self.api_key

response = dashscope.Generation.call(
model='qwen-plus',
model=self.style_recommend_model_name,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
Expand Down Expand Up @@ -671,7 +677,7 @@ def analyze_to_storyboard(self, text: str, entities_json: Dict[str, Any]) -> Lis
dashscope.api_key = self.api_key

response = dashscope.Generation.call(
model='qwen-max',
model=self.storyboard_analysis_model_name,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": "请开始生成分镜帧列表,确保覆盖剧本中的所有内容。"}
Expand Down Expand Up @@ -792,7 +798,7 @@ def polish_storyboard_prompt(self, draft_prompt: str, assets: List[Dict[str, Any
dashscope.api_key = self.api_key

response = dashscope.Generation.call(
model='qwen-plus',
model=self.storyboard_polish_model_name,
prompt=system_prompt,
result_format='message',
response_format={'type': 'json_object'}
Expand Down Expand Up @@ -866,7 +872,7 @@ def polish_video_prompt(self, draft_prompt: str) -> Dict[str, str]:
dashscope.api_key = self.api_key

response = dashscope.Generation.call(
model='qwen-plus',
model=self.video_polish_model_name,
messages=[
{'role': 'system', 'content': system_prompt},
{'role': 'user', 'content': draft_prompt}
Expand Down Expand Up @@ -964,7 +970,7 @@ def polish_r2v_prompt(self, draft_prompt: str, slots: List[Dict[str, str]]) -> D
dashscope.api_key = self.api_key

response = dashscope.Generation.call(
model='qwen-plus',
model=self.r2v_polish_model_name,
messages=[
{'role': 'system', 'content': system_prompt},
{'role': 'user', 'content': draft_prompt}
Expand Down
9 changes: 5 additions & 4 deletions src/apps/comic_gen/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from enum import Enum
import time
from pydantic import BaseModel, Field
from ...model_request_settings import MODEL_REQUEST_SETTINGS

class AspectRatio(str, Enum):
SQUARE = "1:1"
Expand Down Expand Up @@ -76,7 +77,7 @@ class VideoTask(BaseModel):
audio_url: Optional[str] = Field(None, description="URL of generated/uploaded audio")
prompt_extend: bool = Field(True, description="Whether to use prompt extension")
negative_prompt: Optional[str] = Field(None, description="Negative prompt")
model: str = Field("wan2.6-i2v", description="Model used for generation")
model: str = Field(MODEL_REQUEST_SETTINGS.wanx_i2v_model_name_default, description="Model used for generation")
shot_type: str = Field("single", description="Shot type: 'single' or 'multi' (only for wan2.6-i2v)")
generation_mode: str = Field("i2v", description="Generation mode: 'i2v' (image-to-video) or 'r2v' (reference-to-video)")
reference_video_urls: List[str] = Field(default_factory=list, description="Reference video URLs for R2V generation (max 3)")
Expand Down Expand Up @@ -221,9 +222,9 @@ class StoryboardFrame(BaseModel):

class ModelSettings(BaseModel):
"""Model selection settings for different generation stages"""
t2i_model: str = Field("wan2.6-t2i", description="Text-to-Image model for Assets")
i2i_model: str = Field("wan2.6-image", description="Image-to-Image model for Storyboard")
i2v_model: str = Field("wan2.6-i2v", description="Image-to-Video model for Motion")
t2i_model: str = Field(MODEL_REQUEST_SETTINGS.wanx_image_t2i_model_name_default, description="Text-to-Image model for Assets")
i2i_model: str = Field(MODEL_REQUEST_SETTINGS.wanx_image_i2i_model_name_default, description="Image-to-Image model for Storyboard")
i2v_model: str = Field(MODEL_REQUEST_SETTINGS.wanx_i2v_model_name_default, description="Image-to-Video model for Motion")
character_aspect_ratio: str = Field("9:16", description="Aspect ratio for Characters (9:16, 16:9, 1:1)")
scene_aspect_ratio: str = Field("16:9", description="Aspect ratio for Scenes (9:16, 16:9, 1:1)")
prop_aspect_ratio: str = Field("1:1", description="Aspect ratio for Props (9:16, 16:9, 1:1)")
Expand Down
11 changes: 6 additions & 5 deletions src/apps/comic_gen/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from .video import VideoGenerator
from .audio import AudioGenerator
from .export import ExportManager
from ...model_request_settings import MODEL_REQUEST_SETTINGS
from ...utils import get_logger
from ...utils.oss_utils import is_object_key
from ...utils.system_check import get_ffmpeg_path, get_ffmpeg_install_instructions
Expand Down Expand Up @@ -1123,7 +1124,7 @@ def generate_motion_ref(
duration=duration,
created_at=time.time(),
generate_audio=bool(audio_url),
model="wan2.6-i2v",
model=MODEL_REQUEST_SETTINGS.wanx_i2v_model_name_default,
generation_mode="i2v" # Image to video (motion reference)
)

Expand Down Expand Up @@ -1273,7 +1274,7 @@ def generate_video(self, script_id: str) -> Script:
self._save_data()
return script

def create_video_task(self, script_id: str, image_url: str, prompt: str, duration: int = 5, seed: int = None, resolution: str = "720p", generate_audio: bool = False, audio_url: str = None, prompt_extend: bool = True, negative_prompt: str = None, model: str = "wan2.6-i2v", frame_id: str = None, shot_type: str = "single", generation_mode: str = "i2v", reference_video_urls: list = None) -> Tuple[Script, str]:
def create_video_task(self, script_id: str, image_url: str, prompt: str, duration: int = 5, seed: int = None, resolution: str = "720p", generate_audio: bool = False, audio_url: str = None, prompt_extend: bool = True, negative_prompt: str = None, model: str = MODEL_REQUEST_SETTINGS.wanx_i2v_model_name_default, frame_id: str = None, shot_type: str = "single", generation_mode: str = "i2v", reference_video_urls: list = None) -> Tuple[Script, str]:
"""Creates a new video generation task."""
script = self.get_script(script_id)
if not script:
Expand All @@ -1283,7 +1284,7 @@ def create_video_task(self, script_id: str, image_url: str, prompt: str, duratio

# If R2V mode is selected, use the R2V model
if generation_mode == "r2v":
model = "wan2.6-r2v"
model = MODEL_REQUEST_SETTINGS.wanx_r2v_model_name_default

# Snapshot the input image to ensure consistency
snapshot_url = image_url
Expand Down Expand Up @@ -1687,7 +1688,7 @@ def create_asset_video_task(self, script_id: str, asset_id: str, asset_type: str
prompt=prompt or f"Cinematic shot of {target_asset.name}",
status="pending",
duration=duration,
model="wan2.6-r2v", # Force R2V model
model=MODEL_REQUEST_SETTINGS.wanx_r2v_model_name_default, # Force configured R2V model
created_at=time.time()
)

Expand Down Expand Up @@ -1888,7 +1889,7 @@ def create_asset_video_task(self, script_id: str, asset_id: str, asset_type: str
status="pending",
duration=duration,
resolution=resolution,
model="wan2.6-i2v", # Asset video uses I2V
model=MODEL_REQUEST_SETTINGS.wanx_i2v_model_name_default, # Asset video uses configured I2V model
created_at=time.time()
)

Expand Down
103 changes: 103 additions & 0 deletions src/model_request_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import os
from pathlib import Path
from typing import List

from dotenv import load_dotenv


_PROJECT_ROOT = Path(__file__).resolve().parent.parent
_ENV_PATH = _PROJECT_ROOT / ".env"
if _ENV_PATH.exists():
load_dotenv(_ENV_PATH, override=False)


def _split_csv(value: str, default: List[str]) -> List[str]:
if not value:
return default
items = [item.strip() for item in value.split(",") if item.strip()]
return items or default


class ModelRequestSettings:
# DashScope request URLs
dashscope_video_create_url: str = os.getenv(
"DASHSCOPE_VIDEO_CREATE_URL",
"https://dashscope.aliyuncs.com/api/v1/services/aigc/video-generation/video-synthesis",
)
dashscope_image_t2i_url: str = os.getenv(
"DASHSCOPE_IMAGE_T2I_URL",
"https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation",
)
dashscope_image_i2i_url: str = os.getenv(
"DASHSCOPE_IMAGE_I2I_URL",
"https://dashscope.aliyuncs.com/api/v1/services/aigc/image-generation/generation",
)
dashscope_task_query_url_template: str = os.getenv(
"DASHSCOPE_TASK_QUERY_URL_TEMPLATE",
"https://dashscope.aliyuncs.com/api/v1/tasks/{task_id}",
)

# Wanx model names
wanx_t2v_model_name_default: str = os.getenv(
"WANX_T2V_MODEL_NAME_DEFAULT",
"wan2.5-t2v-preview",
)
wanx_i2v_model_name_default: str = os.getenv(
"WANX_I2V_MODEL_NAME_DEFAULT",
"wan2.6-i2v",
)
wanx_r2v_model_name_default: str = os.getenv(
"WANX_R2V_MODEL_NAME_DEFAULT",
"wan2.6-r2v",
)
wanx_http_i2v_model_names: List[str] = _split_csv(
os.getenv("WANX_HTTP_I2V_MODEL_NAMES", ""),
["wan2.6-i2v", "wan2.5-i2v"],
)
wanx_http_r2v_model_names: List[str] = _split_csv(
os.getenv("WANX_HTTP_R2V_MODEL_NAMES", ""),
["wan2.6-r2v"],
)

# Wanx image model names
wanx_image_t2i_model_name_default: str = os.getenv(
"WANX_IMAGE_T2I_MODEL_NAME_DEFAULT",
"wan2.6-t2i",
)
wanx_image_i2i_model_name_default: str = os.getenv(
"WANX_IMAGE_I2I_MODEL_NAME_DEFAULT",
"wan2.6-image",
)
wanx_image_four_ref_models: List[str] = _split_csv(
os.getenv("WANX_IMAGE_FOUR_REF_MODELS", ""),
["wan2.6-image"],
)
wanx_image_http_t2i_model_names: List[str] = _split_csv(
os.getenv("WANX_IMAGE_HTTP_T2I_MODEL_NAMES", ""),
["wan2.6-t2i"],
)
wanx_image_http_i2i_model_names: List[str] = _split_csv(
os.getenv("WANX_IMAGE_HTTP_I2I_MODEL_NAMES", ""),
["wan2.6-image"],
)

# LLM model names
llm_parse_novel_model_name: str = os.getenv("LLM_PARSE_NOVEL_MODEL_NAME", "qwen-max")
llm_storyboard_analysis_model_name: str = os.getenv("LLM_STORYBOARD_ANALYSIS_MODEL_NAME", "qwen-max")
llm_style_recommend_model_name: str = os.getenv("LLM_STYLE_RECOMMEND_MODEL_NAME", "qwen-plus")
llm_storyboard_polish_model_name: str = os.getenv("LLM_STORYBOARD_POLISH_MODEL_NAME", "qwen-plus")
llm_video_polish_model_name: str = os.getenv("LLM_VIDEO_POLISH_MODEL_NAME", "qwen-plus")
llm_r2v_polish_model_name: str = os.getenv("LLM_R2V_POLISH_MODEL_NAME", "qwen-plus")

# Other model providers
qwen_vl_model_name_default: str = os.getenv("QWEN_VL_MODEL_NAME_DEFAULT", "qwen-vl-plus")
doubao_base_url: str = os.getenv("DOUBAO_BASE_URL", "https://ark.cn-beijing.volces.com/api/v3")
doubao_model_name_default: str = os.getenv(
"DOUBAO_MODEL_NAME_DEFAULT",
"doubao-seedance-1-0-pro-fast-251015",
)
kling_base_url: str = os.getenv("KLING_BASE_URL", "https://api.klingai.com/v1")
kling_model_name_default: str = os.getenv("KLING_MODEL_NAME_DEFAULT", "kling-v2-5-turbo")


MODEL_REQUEST_SETTINGS = ModelRequestSettings()
8 changes: 6 additions & 2 deletions src/models/doubao.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import base64
from typing import Tuple, Optional
from .base import VideoGenModel
from ..model_request_settings import MODEL_REQUEST_SETTINGS

# Try to import Ark, handle if not installed (though user said they installed it)
try:
Expand All @@ -17,14 +18,17 @@ class DoubaoModel(VideoGenModel):
def __init__(self, config: dict):
super().__init__(config)
self.api_key = os.getenv("ARK_API_KEY")
self.model_name = config.get('params', {}).get('model_name', 'doubao-seedance-1-0-pro-fast-251015')
self.model_name = config.get('params', {}).get(
'model_name', MODEL_REQUEST_SETTINGS.doubao_model_name_default
)
self.base_url = MODEL_REQUEST_SETTINGS.doubao_base_url

if not self.api_key:
logger.warning("ARK_API_KEY not found in environment variables.")

if Ark:
self.client = Ark(
base_url="https://ark.cn-beijing.volces.com/api/v3",
base_url=self.base_url,
api_key=self.api_key
)
else:
Expand Down
Loading