Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions configs/idea2video.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,39 @@ chat_model:
init_args:
model: google/gemini-2.5-flash-lite-preview-09-2025
model_provider: openai
api_key:
api_key:
base_url: https://openrouter.ai/api/v1


image_generator:
class_path: tools.ImageGeneratorNanobananaGoogleAPI
init_args:
api_key:
api_key:


video_generator:
class_path: tools.VideoGeneratorVeoGoogleAPI
init_args:
api_key:
api_key:


working_dir: .working_dir/idea2video


# Optional: Custom assets to use as references during video generation
# These images/videos will be available as reference materials for the AI
# to use when generating scenes
assets:
sample_images: []
# Use absolute paths or paths relative to where you run the script from
# Write clear, detailed descriptions - the AI uses these to decide when to use each asset
# Example:
# - path: /path/to/sample_image1.png
# description: "A cartoon-style forest scene with tall trees"
# - path: /path/to/sample_image2.png
# description: "A close-up of a friendly dog character"

sample_videos: []
# Example:
# - path: /path/to/sample_video1.mp4
# description: "Camera pan across a sunny park"
7 changes: 7 additions & 0 deletions pipelines/idea2video_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ def __init__(
image_generator: str,
video_generator: str,
working_dir: str,
custom_assets: Optional[Dict[str, List[Dict[str, str]]]] = None,
):
self.chat_model = chat_model
self.image_generator = image_generator
self.video_generator = video_generator
self.working_dir = working_dir
self.custom_assets = custom_assets or {"sample_images": [], "sample_videos": []}
os.makedirs(self.working_dir, exist_ok=True)

self.screenwriter = Screenwriter(chat_model=self.chat_model)
Expand Down Expand Up @@ -50,11 +52,15 @@ def init_from_config(
video_generator_args = config["video_generator"]["init_args"]
video_generator = video_generator_cls(**video_generator_args)

# Load custom assets if provided
custom_assets = config.get("assets", {"sample_images": [], "sample_videos": []})

return cls(
chat_model=chat_model,
image_generator=image_generator,
video_generator=video_generator,
working_dir=config["working_dir"],
custom_assets=custom_assets,
)

async def extract_characters(
Expand Down Expand Up @@ -228,6 +234,7 @@ async def __call__(
image_generator=self.image_generator,
video_generator=self.video_generator,
working_dir=scene_working_dir,
custom_assets=self.custom_assets,
)
final_video_path = await script2video_pipeline(
script=scene_script,
Expand Down
32 changes: 32 additions & 0 deletions pipelines/script2video_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@ def __init__(
image_generator,
video_generator,
working_dir: str,
custom_assets: Optional[Dict[str, List[Dict[str, str]]]] = None,
):

self.chat_model = chat_model
self.image_generator = image_generator
self.video_generator = video_generator
self.custom_assets = custom_assets or {"sample_images": [], "sample_videos": []}

self.character_extractor = CharacterExtractor(chat_model=self.chat_model)
self.character_portraits_generator = CharacterPortraitsGenerator(image_generator=self.image_generator)
Expand All @@ -44,7 +46,26 @@ def __init__(
self.working_dir = working_dir
os.makedirs(self.working_dir, exist_ok=True)

def _get_custom_asset_pairs(self) -> List[Tuple[str, str]]:
"""
Convert custom assets from config into (path, description) pairs.
Returns a list of tuples suitable for available_image_path_and_text_pairs.
"""
asset_pairs = []

# Add sample images
for asset in self.custom_assets.get("sample_images", []):
if "path" in asset and "description" in asset:
# Verify the file exists
if os.path.exists(asset["path"]):
asset_pairs.append((asset["path"], asset["description"]))
else:
print(f"⚠️ Warning: Custom asset image not found: {asset['path']}")

# Note: sample_videos could be added here in the future if needed
# For now, we focus on sample_images as they're used as reference images

return asset_pairs

@classmethod
def init_from_config(
Expand All @@ -67,11 +88,15 @@ def init_from_config(
video_generator_args = config["video_generator"]["init_args"]
video_generator = video_generator_cls(**video_generator_args)

# Load custom assets if provided
custom_assets = config.get("assets", {"sample_images": [], "sample_videos": []})

return cls(
chat_model=chat_model,
image_generator=image_generator,
video_generator=video_generator,
working_dir=config["working_dir"],
custom_assets=custom_assets,
)

async def __call__(
Expand Down Expand Up @@ -192,6 +217,9 @@ async def generate_frames_for_single_camera(
print(f"🖼️ Starting first_frame generation for shot {first_shot_idx}...")
available_image_path_and_text_pairs = []

# Add custom assets from config
available_image_path_and_text_pairs.extend(self._get_custom_asset_pairs())

for character_idx in shot_descriptions[first_shot_idx].ff_vis_char_idxs:
identifier_in_scene = characters[character_idx].identifier_in_scene
registry_item = character_portraits_registry[identifier_in_scene]
Expand Down Expand Up @@ -363,6 +391,10 @@ async def generate_frame_for_single_shot(
else:
print(f"🖼️ Starting {frame_type} generation for shot {shot_idx}...")
available_image_path_and_text_pairs = []

# Add custom assets from config
available_image_path_and_text_pairs.extend(self._get_custom_asset_pairs())

for visible_character in visible_characters:
identifier_in_scene = visible_character.identifier_in_scene
registry_item = character_portraits_registry[identifier_in_scene]
Expand Down
37 changes: 36 additions & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ https://github.com/user-attachments/assets/5bad46b2-8276-4e1d-9480-3522640744b2
- [🔮 Demos](#Video-Demos-Generated-from-Scratch)
- [🏗️ Architecture](#️-architecture)
- [🚀 Quick Start](#quick-start)
- [🎨 Custom Assets Configuration](#custom-assets-configuration)

---
## 💡Key Features
Expand Down Expand Up @@ -391,7 +392,7 @@ Parallel processing for sequential shots captured from the same camera enables h
### 🖥️ **Environment**

```
OS: Linux, Windows
OS: Linux, Windows, MacOS
```

### 📥 **Clone and Install**
Expand Down Expand Up @@ -460,6 +461,40 @@ style = "Animate Style"
```


---

## 🎨 Custom Assets Configuration

Custom assets are configured in your `configs/idea2video.yaml` or `configs/script2video.yaml`
(or another) files under the `assets` section. This allows you to provide custom sample
images and videos that will be used as reference materials during the video generation process.

When you add sample images to the configuration:

1. The images are loaded at pipeline initialization
2. They become available as reference materials for the `ReferenceImageSelector` agent
3. The AI can choose to use these images when generating frames for scenes
4. Your custom images are added alongside character portraits and generated scene images

**Use cases for sample images:**
- Specific art styles you want to reference
- Background scenes or environments
- Object references (vehicles, buildings, props)
- Color palette references
- Composition examples

### Sample Videos

Sample videos are currently loaded but not yet fully integrated into the generation
pipeline. Future updates may enable using video frames as additional reference materials.

### Tips for Best Results

1. **Match Your Style**: Choose reference images that match the style parameter you're using (e.g., "Cartoon", "Realistic", etc.)
2. **High Quality**: Use high-resolution images (the pipeline works with 1600x900 frames)
3. **Relevant Descriptions**: Be specific in descriptions - mention colors, mood, composition, and key elements
4. **Variety**: Include different types of references (environments, objects, compositions) for more flexibility

---

**🌟 If this project helps you, please give us a Star!**
Expand Down