@@ -48,6 +48,7 @@ def startup(self):
4848
4949 # Import classes here
5050 from preprocessing .preprocessor import Preprocessor
51+ from embeddings .embedder import VideoEmbedder
5152 from database .pinecone_connector import PineconeConnector
5253 from database .job_store_connector import JobStoreConnector
5354 from database .r2_connector import R2Connector
@@ -81,6 +82,7 @@ def startup(self):
8182 # Instantiate classes
8283
8384 self .preprocessor = Preprocessor (min_chunk_duration = 1.0 , max_chunk_duration = 10.0 , scene_threshold = 13.0 )
85+ self .video_embedder = VideoEmbedder ()
8486 self .pinecone_connector = PineconeConnector (api_key = PINECONE_API_KEY , index_name = PINECONE_CHUNKS_INDEX )
8587 self .job_store = JobStoreConnector (dict_name = "clipabit-jobs" )
8688 self .r2_connector = R2Connector (account_id = R2_ACCOUNT_ID ,
@@ -128,11 +130,45 @@ async def process_video(self, video_bytes: bytes, filename: str, job_id: str):
128130 # Prepare chunk details for response (without frame arrays)
129131 chunk_details = []
130132 for chunk in processed_chunks :
133+ embedding = self .video_embedder ._generate_clip_embedding (chunk ["frames" ], num_frames = 8 )
134+
135+ logger .info (f"[Job { job_id } ] Generated CLIP embedding for chunk { chunk ['chunk_id' ]} " )
136+ logger .info (f"[Job { job_id } ] Upserting embedding for chunk { chunk ['chunk_id' ]} to Pinecone..." )
137+
138+
139+ # 1. Handle timestamp_range (List of Numbers -> Two Numbers)
140+ if 'timestamp_range' in chunk ['metadata' ]:
141+ start_time , end_time = chunk ['metadata' ].pop ('timestamp_range' )
142+ chunk ['metadata' ]['start_time_s' ] = start_time
143+ chunk ['metadata' ]['end_time_s' ] = end_time
144+
145+ # 2. Handle file_info (Nested Dict -> Flat Keys)
146+ if 'file_info' in chunk ['metadata' ]:
147+ file_info = chunk ['metadata' ].pop ('file_info' )
148+ for key , value in file_info .items ():
149+ chunk ['metadata' ][f'file_{ key } ' ] = value
150+
151+ # 3. Final Check: Remove Nulls (Optional but good practice)
152+ # Pinecone rejects keys with null values.
153+ keys_to_delete = [k for k , v in chunk ['metadata' ].items () if v is None ]
154+ for k in keys_to_delete :
155+ del chunk ['metadata' ][k ]
156+
157+
158+ self .pinecone_connector .upsert_chunk (
159+ chunk_id = chunk ['chunk_id' ],
160+ chunk_embedding = embedding .numpy (),
161+ namespace = "test" ,
162+ metadata = chunk ['metadata' ]
163+ )
164+
131165 chunk_details .append ({
132166 "chunk_id" : chunk ['chunk_id' ],
133167 "metadata" : chunk ['metadata' ],
134- "memory_mb" : chunk ['memory_mb' ]
168+ "memory_mb" : chunk ['memory_mb' ],
135169 })
170+
171+ # TODO: Upload processed data to S3
136172
137173 result = {
138174 "job_id" : job_id ,
@@ -143,7 +179,7 @@ async def process_video(self, video_bytes: bytes, filename: str, job_id: str):
143179 "total_frames" : total_frames ,
144180 "total_memory_mb" : total_memory ,
145181 "avg_complexity" : avg_complexity ,
146- "chunk_details" : chunk_details
182+ "chunk_details" : chunk_details ,
147183 }
148184
149185 logger .info (f"[Job { job_id } ] Finished processing { filename } " )
@@ -219,7 +255,7 @@ async def upload(self, file: UploadFile = None):
219255 "message" : "Video uploaded successfully, processing in background"
220256 }
221257
222- @modal .fastapi_endpoint (method = "POST " )
258+ @modal .fastapi_endpoint (method = "GET " )
223259 async def search (self , query : str ):
224260 """Search endpoint - accepts a text query and returns semantic search results."""
225261 logger .info (f"[Search] Query: { query } " )
0 commit comments