HKUDS · roberto-mello · Nov 22, 2025 · Nov 23, 2025
diff --git a/configs/idea2video.yaml b/configs/idea2video.yaml
@@ -2,20 +2,39 @@ chat_model:
   init_args:
     model: google/gemini-2.5-flash-lite-preview-09-2025
     model_provider: openai
-    api_key: 
+    api_key:
     base_url: https://openrouter.ai/api/v1
 
 
 image_generator:
   class_path: tools.ImageGeneratorNanobananaGoogleAPI
   init_args:
-    api_key: 
+    api_key:
 
 
 video_generator:
   class_path: tools.VideoGeneratorVeoGoogleAPI
   init_args:
-    api_key: 
+    api_key:
 
 
 working_dir: .working_dir/idea2video
+
+
+# Optional: Custom assets to use as references during video generation
+# These images/videos will be available as reference materials for the AI
+# to use when generating scenes
+assets:
+  sample_images: []
+    # Use absolute paths or paths relative to where you run the script from
+    # Write clear, detailed descriptions - the AI uses these to decide when to use each asset
+    # Example:
+    # - path: /path/to/sample_image1.png
+    #   description: "A cartoon-style forest scene with tall trees"
+    # - path: /path/to/sample_image2.png
+    #   description: "A close-up of a friendly dog character"
+
+  sample_videos: []
+    # Example:
+    # - path: /path/to/sample_video1.mp4
+    #   description: "Camera pan across a sunny park"
diff --git a/pipelines/idea2video_pipeline.py b/pipelines/idea2video_pipeline.py
@@ -18,11 +18,13 @@ def __init__(
         image_generator: str,
         video_generator: str,
         working_dir: str,
+        custom_assets: Optional[Dict[str, List[Dict[str, str]]]] = None,
     ):
         self.chat_model = chat_model
         self.image_generator = image_generator
         self.video_generator = video_generator
         self.working_dir = working_dir
+        self.custom_assets = custom_assets or {"sample_images": [], "sample_videos": []}
         os.makedirs(self.working_dir, exist_ok=True)
 
         self.screenwriter = Screenwriter(chat_model=self.chat_model)
@@ -50,11 +52,15 @@ def init_from_config(
         video_generator_args = config["video_generator"]["init_args"]
         video_generator = video_generator_cls(**video_generator_args)
 
+        # Load custom assets if provided
+        custom_assets = config.get("assets", {"sample_images": [], "sample_videos": []})
+
         return cls(
             chat_model=chat_model,
             image_generator=image_generator,
             video_generator=video_generator,
             working_dir=config["working_dir"],
+            custom_assets=custom_assets,
         )
 
     async def extract_characters(
@@ -228,6 +234,7 @@ async def __call__(
                 image_generator=self.image_generator,
                 video_generator=self.video_generator,
                 working_dir=scene_working_dir,
+                custom_assets=self.custom_assets,
             )
             final_video_path = await script2video_pipeline(
                 script=scene_script,

diff --git a/pipelines/script2video_pipeline.py b/pipelines/script2video_pipeline.py
@@ -29,11 +29,13 @@ def __init__(
         image_generator,
         video_generator,
         working_dir: str,
+        custom_assets: Optional[Dict[str, List[Dict[str, str]]]] = None,
     ):
 
         self.chat_model = chat_model
         self.image_generator = image_generator
         self.video_generator = video_generator
+        self.custom_assets = custom_assets or {"sample_images": [], "sample_videos": []}
 
         self.character_extractor = CharacterExtractor(chat_model=self.chat_model)
         self.character_portraits_generator = CharacterPortraitsGenerator(image_generator=self.image_generator)
@@ -44,7 +46,26 @@ def __init__(
         self.working_dir = working_dir
         os.makedirs(self.working_dir, exist_ok=True)
 
+    def _get_custom_asset_pairs(self) -> List[Tuple[str, str]]:
+        """
+        Convert custom assets from config into (path, description) pairs.
+        Returns a list of tuples suitable for available_image_path_and_text_pairs.
+        """
+        asset_pairs = []
+
+        # Add sample images
+        for asset in self.custom_assets.get("sample_images", []):
+            if "path" in asset and "description" in asset:
+                # Verify the file exists
+                if os.path.exists(asset["path"]):
+                    asset_pairs.append((asset["path"], asset["description"]))
+                else:
+                    print(f"⚠️ Warning: Custom asset image not found: {asset['path']}")
+
+        # Note: sample_videos could be added here in the future if needed
+        # For now, we focus on sample_images as they're used as reference images
 
+        return asset_pairs
 
     @classmethod
     def init_from_config(
@@ -67,11 +88,15 @@ def init_from_config(
         video_generator_args = config["video_generator"]["init_args"]
         video_generator = video_generator_cls(**video_generator_args)
 
+        # Load custom assets if provided
+        custom_assets = config.get("assets", {"sample_images": [], "sample_videos": []})
+
         return cls(
             chat_model=chat_model,
             image_generator=image_generator,
             video_generator=video_generator,
             working_dir=config["working_dir"],
+            custom_assets=custom_assets,
         )
 
     async def __call__(
@@ -192,6 +217,9 @@ async def generate_frames_for_single_camera(
             print(f"🖼️ Starting first_frame generation for shot {first_shot_idx}...")
             available_image_path_and_text_pairs = []
 
+            # Add custom assets from config
+            available_image_path_and_text_pairs.extend(self._get_custom_asset_pairs())
+
             for character_idx in shot_descriptions[first_shot_idx].ff_vis_char_idxs:
                 identifier_in_scene = characters[character_idx].identifier_in_scene
                 registry_item = character_portraits_registry[identifier_in_scene]
@@ -363,6 +391,10 @@ async def generate_frame_for_single_shot(
         else:
             print(f"🖼️ Starting {frame_type} generation for shot {shot_idx}...")
             available_image_path_and_text_pairs = []
+
+            # Add custom assets from config
+            available_image_path_and_text_pairs.extend(self._get_custom_asset_pairs())
+
             for visible_character in visible_characters:
                 identifier_in_scene = visible_character.identifier_in_scene
                 registry_item = character_portraits_registry[identifier_in_scene]

diff --git a/readme.md b/readme.md
@@ -61,6 +61,7 @@ https://github.com/user-attachments/assets/5bad46b2-8276-4e1d-9480-3522640744b2
 - [🔮 Demos](#Video-Demos-Generated-from-Scratch)
 - [🏗️ Architecture](#️-architecture)
 - [🚀 Quick Start](#quick-start)
+- [🎨 Custom Assets Configuration](#custom-assets-configuration)
 
 ---
 ## 💡Key Features
@@ -391,7 +392,7 @@ Parallel processing for sequential shots captured from the same camera enables h
 ### 🖥️ **Environment**
 
 ```
-OS: Linux, Windows
+OS: Linux, Windows, MacOS
 ```
 
 ### 📥 **Clone and Install**
@@ -460,6 +461,40 @@ style = "Animate Style"
 ```
 
 
+---
+
+## 🎨 Custom Assets Configuration
+
+Custom assets are configured in your `configs/idea2video.yaml` or `configs/script2video.yaml`
+(or another) files under the `assets` section. This allows you to provide custom sample
+images and videos that will be used as reference materials during the video generation process.
+
+When you add sample images to the configuration:
+
+1. The images are loaded at pipeline initialization
+2. They become available as reference materials for the `ReferenceImageSelector` agent
+3. The AI can choose to use these images when generating frames for scenes
+4. Your custom images are added alongside character portraits and generated scene images
+
+**Use cases for sample images:**
+- Specific art styles you want to reference
+- Background scenes or environments
+- Object references (vehicles, buildings, props)
+- Color palette references
+- Composition examples
+
+### Sample Videos
+
+Sample videos are currently loaded but not yet fully integrated into the generation
+pipeline. Future updates may enable using video frames as additional reference materials.
+
+### Tips for Best Results
+
+1. **Match Your Style**: Choose reference images that match the style parameter you're using (e.g., "Cartoon", "Realistic", etc.)
+2. **High Quality**: Use high-resolution images (the pipeline works with 1600x900 frames)
+3. **Relevant Descriptions**: Be specific in descriptions - mention colors, mood, composition, and key elements
+4. **Variety**: Include different types of references (environments, objects, compositions) for more flexibility
+
 ---
 
 **🌟 If this project helps you, please give us a Star!**