Skip to content

Commit 322fb1e

Browse files
committed
refactor: backend main & search for namespaces
1 parent 2e31081 commit 322fb1e

File tree

2 files changed

+26
-18
lines changed

2 files changed

+26
-18
lines changed

backend/main.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
import logging
3-
from fastapi import UploadFile, HTTPException
3+
from fastapi import UploadFile, HTTPException, Form
44
import modal
55

66
# Constants
@@ -110,16 +110,16 @@ def startup(self):
110110
print(f"[Container] Started at {self.start_time.isoformat()}")
111111

112112
@modal.method()
113-
async def process_video(self, video_bytes: bytes, filename: str, job_id: str):
114-
logger.info(f"[Job {job_id}] Processing started: {filename} ({len(video_bytes)} bytes)")
113+
async def process_video(self, video_bytes: bytes, filename: str, job_id: str, namespace: str = ""):
114+
logger.info(f"[Job {job_id}] Processing started: {filename} ({len(video_bytes)} bytes) | namespace='{namespace}'")
115115

116116
try:
117117
# Upload original video to R2 bucket
118118
# TODO: do this in parallel with processing and provide url once done
119119
success, hashed_identifier = self.r2_connector.upload_video(
120120
video_data=video_bytes,
121121
filename=filename,
122-
# user_id="user1" # Specify user ID once we have user management
122+
namespace=namespace
123123
)
124124
if not success:
125125
raise Exception(f"Failed to upload video to R2 storage: {hashed_identifier}")
@@ -175,7 +175,7 @@ async def process_video(self, video_bytes: bytes, filename: str, job_id: str):
175175
self.pinecone_connector.upsert_chunk(
176176
chunk_id=chunk['chunk_id'],
177177
chunk_embedding=embedding.numpy(),
178-
namespace="",
178+
namespace=namespace,
179179
metadata=chunk['metadata']
180180
)
181181

@@ -244,12 +244,13 @@ async def status(self, job_id: str):
244244
return job_data
245245

246246
@modal.fastapi_endpoint(method="POST")
247-
async def upload(self, file: UploadFile = None):
247+
async def upload(self, file: UploadFile = None, namespace: str = Form("")):
248248
"""
249249
Handle video file upload and start background processing.
250250
251251
Args:
252252
file (UploadFile): The uploaded video file.
253+
namespace (str, optional): Namespace for Pinecone and R2 storage (default: "")
253254
254255
Returns:
255256
dict: Contains job_id, filename, content_type, size_bytes, status, and message.
@@ -264,11 +265,12 @@ async def upload(self, file: UploadFile = None):
264265
"filename": file.filename,
265266
"status": "processing",
266267
"size_bytes": file_size,
267-
"content_type": file.content_type
268+
"content_type": file.content_type,
269+
"namespace": namespace
268270
})
269271

270272
# Spawn background processing (non-blocking - returns immediately)
271-
self.process_video.spawn(contents, file.filename, job_id)
273+
self.process_video.spawn(contents, file.filename, job_id, namespace)
272274

273275
return {
274276
"job_id": job_id,
@@ -281,12 +283,13 @@ async def upload(self, file: UploadFile = None):
281283

282284

283285
@modal.fastapi_endpoint(method="GET")
284-
async def search(self, query: str):
286+
async def search(self, query: str, namespace: str = ""):
285287
"""
286288
Search endpoint - accepts a text query and returns semantic search results.
287289
288290
Args:
289291
- query (str): The search query string (required)
292+
- namespace (str, optional): Namespace for Pinecone search (default: "")
290293
- top_k (int, optional): Number of top results to return (default: 10)
291294
292295
Returns: dict with 'query', 'results', and 'timing'.
@@ -300,16 +303,17 @@ async def search(self, query: str):
300303
raise HTTPException(status_code=400, detail="Missing 'query' parameter")
301304

302305
top_k = 10
303-
logger.info(f"[Search] Query: '{query}' | top_k={top_k}")
306+
logger.info(f"[Search] Query: '{query}' | namespace='{namespace}' | top_k={top_k}")
304307

305308
# Execute semantic search
306309
results = self.searcher.search(
307310
query=query,
308-
top_k=top_k
311+
top_k=top_k,
312+
namespace=namespace
309313
)
310-
314+
311315
t_done = time.perf_counter()
312-
316+
313317
# Log chunk-level results only
314318
logger.info(f"[Search] Found {len(results)} chunk-level results in {t_done - t_start:.3f}s")
315319

backend/search/searcher.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,16 @@ def device(self) -> str:
6161
def search(
6262
self,
6363
query: str,
64-
top_k: int = 5
64+
top_k: int = 5,
65+
namespace: Optional[str] = None
6566
) -> List[Dict[str, Any]]:
6667
"""
6768
Search for semantically similar content.
6869
6970
Args:
7071
query: Natural language search query
7172
top_k: Number of results to return (default: 5)
73+
namespace: Optional namespace to override default (default: None uses self.namespace)
7274
7375
Returns:
7476
List of matches with scores and metadata, sorted by similarity
@@ -79,15 +81,17 @@ def search(
7981
print(f"Score: {result['score']}")
8082
print(f"Metadata: {result['metadata']}")
8183
"""
82-
logger.info(f"Searching for: '{query}' (top_k={top_k})")
83-
84+
# Use provided namespace or fall back to default
85+
search_namespace = namespace if namespace is not None else self.namespace
86+
logger.info(f"Searching for: '{query}' (top_k={top_k}, namespace='{search_namespace}')")
87+
8488
# Generate query embedding
8589
query_embedding = self.embedder.embed_text(query)
86-
90+
8791
# Search Pinecone with optional filters
8892
matches = self.connector.query_chunks(
8993
query_embedding=query_embedding,
90-
namespace=self.namespace,
94+
namespace=search_namespace,
9195
top_k=top_k
9296
)
9397

0 commit comments

Comments
 (0)