Skip to content

Commit 4571873

Browse files
authored
Merge pull request #22 from ClipABit/env-vars-and-api-url-refactor
refactor: env vars and api url logic
2 parents a966b5a + c3a140b commit 4571873

File tree

8 files changed

+131
-50
lines changed

8 files changed

+131
-50
lines changed

backend/README.md

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,23 @@ uv sync
1111
# 2. Authenticate with Modal (first time only - opens browser)
1212
uv run modal token new
1313

14-
# 3. Get Environment variables and populate .env (check docs or reach out to TPMs)
15-
16-
# 4. Start dev server (hot-reloads on file changes)
17-
uv run modal serve main.py
14+
# 3. Configure Modal secrets (dev and prod)
15+
modal secret create dev \
16+
ENVIRONMENT=dev \
17+
PINECONE_API_KEY=your_pinecone_api_key \
18+
R2_ACCOUNT_ID=your_r2_account_id \
19+
R2_ACCESS_KEY_ID=your_r2_access_key_id \
20+
R2_SECRET_ACCESS_KEY=your_r2_secret_access_key
21+
22+
modal secret create prod \
23+
ENVIRONMENT=prod \
24+
PINECONE_API_KEY=your_pinecone_api_key \
25+
R2_ACCOUNT_ID=your_r2_account_id \
26+
R2_ACCESS_KEY_ID=your_r2_access_key_id \
27+
R2_SECRET_ACCESS_KEY=your_r2_secret_access_key
28+
29+
# 4. Start dev server (hot-reloads on file changes, uses "dev" secret)
30+
uv run dev
1831
```
1932

2033
Note: `uv run` automatically uses the virtual environment - no need to activate it manually.
@@ -23,8 +36,9 @@ Note: `uv run` automatically uses the virtual environment - no need to activate
2336

2437
- `main.py` defines a Modal App with a `Server` class
2538
- `/upload` endpoint accepts video files and spawns background processing jobs
26-
27-
[...]
39+
- Environment variables stored in Modal secrets (no .env files needed)
40+
- `uv run dev` automatically uses "dev" secret for development
41+
- Production deployment handled via CI/CD or direct Modal CLI
2842

2943
## Managing Dependencies
3044

@@ -34,3 +48,11 @@ uv add --dev package-name # Add dev dependency
3448
uv remove package-name # Remove dependency
3549
uv sync --upgrade # Update all packages
3650
```
51+
52+
## Running Tests
53+
54+
```bash
55+
uv run pytest # Run all tests
56+
uv run pytest -v # Verbose output
57+
uv run pytest --cov # With coverage
58+
```

backend/cli.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,4 @@
1-
import os
21
import subprocess
32

43
def serve():
54
subprocess.run(["modal", "serve", "main.py"])
6-
7-
def deploy():
8-
env = os.environ.copy()
9-
env["ENV"] = "prod"
10-
subprocess.run(["modal", "deploy", "main.py"], env=env)

backend/database/r2_connector.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,16 @@ def __init__(
2121
account_id: str,
2222
access_key_id: str,
2323
secret_access_key: str,
24-
environment: str = "dev" # dev, test, or prod
24+
environment: str = "dev" # dev or prod
2525
):
2626
"""
2727
Initialize R2 connector with bucket credentials.
28-
28+
2929
Args:
3030
account_id: Cloudflare account ID
3131
access_key_id: R2 access key ID
3232
secret_access_key: R2 secret access key
33-
environment: Environment name (dev/test/prod) which maps directly to bucket name
33+
environment: Environment name (dev/prod) which maps directly to bucket name
3434
"""
3535
self.bucket_name = environment
3636
self.endpoint_url = f"https://{account_id}.r2.cloudflarestorage.com"

backend/main.py

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
import os
22
import logging
3-
from fastapi import UploadFile, HTTPException
3+
from fastapi import UploadFile, HTTPException, Form
44
import modal
55

6-
# Constants
7-
PINECONE_CHUNKS_INDEX = "chunks-index"
6+
# Pinecone index names per environment
7+
PINECONE_INDEX_MAP = {
8+
"dev": "chunks-index",
9+
"prod": "prod-chunks"
10+
}
811

912
# Configure logging
1013
logging.basicConfig(
@@ -27,8 +30,8 @@
2730
)
2831
)
2932

30-
# Environment: "dev" (default) or "prod" (set via ENV variable)
31-
env = os.environ.get("ENV", "dev")
33+
# Environment: "dev" (default) or "prod" (set via ENVIRONMENT variable)
34+
env = os.environ.get("ENVIRONMENT", "dev")
3235

3336
# Create Modal app
3437
app = modal.App(
@@ -82,14 +85,18 @@ def startup(self):
8285
raise ValueError("R2_SECRET_ACCESS_KEY not found in environment variables")
8386

8487
ENVIRONMENT = os.getenv("ENVIRONMENT", "dev")
85-
if ENVIRONMENT not in ["dev", "test", "prod"]:
86-
raise ValueError(f"Invalid ENVIRONMENT value: {ENVIRONMENT}. Must be one of: dev, test, prod")
88+
if ENVIRONMENT not in ["dev", "prod"]:
89+
raise ValueError(f"Invalid ENVIRONMENT value: {ENVIRONMENT}. Must be one of: dev, prod")
8790
logger.info(f"Running in environment: {ENVIRONMENT}")
8891

92+
# Select Pinecone index based on environment
93+
pinecone_index = PINECONE_INDEX_MAP[ENVIRONMENT]
94+
logger.info(f"Using Pinecone index: {pinecone_index}")
95+
8996
# Instantiate classes
9097
self.preprocessor = Preprocessor(min_chunk_duration=1.0, max_chunk_duration=10.0, scene_threshold=13.0)
9198
self.video_embedder = VideoEmbedder()
92-
self.pinecone_connector = PineconeConnector(api_key=PINECONE_API_KEY, index_name=PINECONE_CHUNKS_INDEX)
99+
self.pinecone_connector = PineconeConnector(api_key=PINECONE_API_KEY, index_name=pinecone_index)
93100
self.job_store = JobStoreConnector(dict_name="clipabit-jobs")
94101

95102
self.r2_connector = R2Connector(
@@ -101,7 +108,7 @@ def startup(self):
101108

102109
self.searcher = Searcher(
103110
api_key=PINECONE_API_KEY,
104-
index_name=PINECONE_CHUNKS_INDEX,
111+
index_name=pinecone_index,
105112
r2_connector=self.r2_connector
106113
)
107114

@@ -110,16 +117,16 @@ def startup(self):
110117
print(f"[Container] Started at {self.start_time.isoformat()}")
111118

112119
@modal.method()
113-
async def process_video(self, video_bytes: bytes, filename: str, job_id: str):
114-
logger.info(f"[Job {job_id}] Processing started: {filename} ({len(video_bytes)} bytes)")
120+
async def process_video(self, video_bytes: bytes, filename: str, job_id: str, namespace: str = ""):
121+
logger.info(f"[Job {job_id}] Processing started: {filename} ({len(video_bytes)} bytes) | namespace='{namespace}'")
115122

116123
try:
117124
# Upload original video to R2 bucket
118125
# TODO: do this in parallel with processing and provide url once done
119126
success, hashed_identifier = self.r2_connector.upload_video(
120127
video_data=video_bytes,
121128
filename=filename,
122-
# user_id="user1" # Specify user ID once we have user management
129+
namespace=namespace
123130
)
124131
if not success:
125132
raise Exception(f"Failed to upload video to R2 storage: {hashed_identifier}")
@@ -175,7 +182,7 @@ async def process_video(self, video_bytes: bytes, filename: str, job_id: str):
175182
self.pinecone_connector.upsert_chunk(
176183
chunk_id=chunk['chunk_id'],
177184
chunk_embedding=embedding.numpy(),
178-
namespace="",
185+
namespace=namespace,
179186
metadata=chunk['metadata']
180187
)
181188

@@ -244,12 +251,13 @@ async def status(self, job_id: str):
244251
return job_data
245252

246253
@modal.fastapi_endpoint(method="POST")
247-
async def upload(self, file: UploadFile = None):
254+
async def upload(self, file: UploadFile = None, namespace: str = Form("")):
248255
"""
249256
Handle video file upload and start background processing.
250257
251258
Args:
252259
file (UploadFile): The uploaded video file.
260+
namespace (str, optional): Namespace for Pinecone and R2 storage (default: "")
253261
254262
Returns:
255263
dict: Contains job_id, filename, content_type, size_bytes, status, and message.
@@ -264,11 +272,12 @@ async def upload(self, file: UploadFile = None):
264272
"filename": file.filename,
265273
"status": "processing",
266274
"size_bytes": file_size,
267-
"content_type": file.content_type
275+
"content_type": file.content_type,
276+
"namespace": namespace
268277
})
269278

270279
# Spawn background processing (non-blocking - returns immediately)
271-
self.process_video.spawn(contents, file.filename, job_id)
280+
self.process_video.spawn(contents, file.filename, job_id, namespace)
272281

273282
return {
274283
"job_id": job_id,
@@ -281,12 +290,13 @@ async def upload(self, file: UploadFile = None):
281290

282291

283292
@modal.fastapi_endpoint(method="GET")
284-
async def search(self, query: str):
293+
async def search(self, query: str, namespace: str = ""):
285294
"""
286295
Search endpoint - accepts a text query and returns semantic search results.
287296
288297
Args:
289298
- query (str): The search query string (required)
299+
- namespace (str, optional): Namespace for Pinecone search (default: "")
290300
- top_k (int, optional): Number of top results to return (default: 10)
291301
292302
Returns: dict with 'query', 'results', and 'timing'.
@@ -300,16 +310,17 @@ async def search(self, query: str):
300310
raise HTTPException(status_code=400, detail="Missing 'query' parameter")
301311

302312
top_k = 10
303-
logger.info(f"[Search] Query: '{query}' | top_k={top_k}")
313+
logger.info(f"[Search] Query: '{query}' | namespace='{namespace}' | top_k={top_k}")
304314

305315
# Execute semantic search
306316
results = self.searcher.search(
307317
query=query,
308-
top_k=top_k
318+
top_k=top_k,
319+
namespace=namespace
309320
)
310-
321+
311322
t_done = time.perf_counter()
312-
323+
313324
# Log chunk-level results only
314325
logger.info(f"[Search] Found {len(results)} chunk-level results in {t_done - t_start:.3f}s")
315326

backend/pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ dependencies = [
2020

2121
[project.scripts]
2222
dev = "cli:serve"
23-
deploy = "cli:deploy"
2423

2524
[build-system]
2625
requires = ["hatchling"]

backend/search/searcher.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,16 @@ def device(self) -> str:
6161
def search(
6262
self,
6363
query: str,
64-
top_k: int = 5
64+
top_k: int = 5,
65+
namespace: Optional[str] = None
6566
) -> List[Dict[str, Any]]:
6667
"""
6768
Search for semantically similar content.
6869
6970
Args:
7071
query: Natural language search query
7172
top_k: Number of results to return (default: 5)
73+
namespace: Optional namespace to override default (default: None uses self.namespace)
7274
7375
Returns:
7476
List of matches with scores and metadata, sorted by similarity
@@ -79,15 +81,17 @@ def search(
7981
print(f"Score: {result['score']}")
8082
print(f"Metadata: {result['metadata']}")
8183
"""
82-
logger.info(f"Searching for: '{query}' (top_k={top_k})")
83-
84+
# Use provided namespace or fall back to default
85+
search_namespace = namespace if namespace is not None else self.namespace
86+
logger.info(f"Searching for: '{query}' (top_k={top_k}, namespace='{search_namespace}')")
87+
8488
# Generate query embedding
8589
query_embedding = self.embedder.embed_text(query)
86-
90+
8791
# Search Pinecone with optional filters
8892
matches = self.connector.query_chunks(
8993
query_embedding=query_embedding,
90-
namespace=self.namespace,
94+
namespace=search_namespace,
9195
top_k=top_k
9296
)
9397

frontend/streamlit/app.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import time
33
import requests
44
import streamlit as st
5+
from config import Config
56

67

78
# Page configuration
@@ -15,17 +16,18 @@
1516
if 'search_results' not in st.session_state:
1617
st.session_state.search_results = None
1718

18-
# API endpoints
19-
SEARCH_API_URL = "https://clipabit01--clipabit-server-search-dev.modal.run"
20-
UPLOAD_API_URL = "https://clipabit01--clipabit-server-upload-dev.modal.run"
21-
STATUS_API_URL = "https://clipabit01--clipabit-server-status-dev.modal.run"
22-
LIST_VIDEOS_API_URL = "https://clipabit01--clipabit-server-list-videos-dev.modal.run"
19+
# API endpoints from config
20+
SEARCH_API_URL = Config.SEARCH_API_URL
21+
UPLOAD_API_URL = Config.UPLOAD_API_URL
22+
STATUS_API_URL = Config.STATUS_API_URL
23+
LIST_VIDEOS_API_URL = Config.LIST_VIDEOS_API_URL
24+
NAMESPACE = Config.NAMESPACE
2325

2426

2527
def search_videos(query: str):
2628
"""Send search query to backend."""
2729
try:
28-
resp = requests.get(SEARCH_API_URL, params={"query": query}, timeout=30)
30+
resp = requests.get(SEARCH_API_URL, params={"query": query, "namespace": NAMESPACE}, timeout=30)
2931
if resp.status_code == 200:
3032
return resp.json()
3133
else:
@@ -38,7 +40,7 @@ def search_videos(query: str):
3840
def fetch_all_videos():
3941
"""Fetch all videos from the backend."""
4042
try:
41-
resp = requests.get(LIST_VIDEOS_API_URL, timeout=30)
43+
resp = requests.get(LIST_VIDEOS_API_URL, params={"namespace": NAMESPACE}, timeout=30)
4244
if resp.status_code == 200:
4345
data = resp.json()
4446
return data.get("videos", [])
@@ -50,7 +52,8 @@ def fetch_all_videos():
5052
def upload_file_to_backend(file_bytes: bytes, filename: str, content_type: str | None = None):
5153
"""Upload file to backend via multipart form-data."""
5254
files = {"file": (filename, io.BytesIO(file_bytes), content_type or "application/octet-stream")}
53-
resp = requests.post(UPLOAD_API_URL, files=files, timeout=300)
55+
data = {"namespace": NAMESPACE}
56+
resp = requests.post(UPLOAD_API_URL, files=files, data=data, timeout=300)
5457
return resp
5558

5659

frontend/streamlit/config.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
"""Configuration module for ClipABit Streamlit frontend."""
2+
3+
import os
4+
5+
6+
class Config:
7+
"""Configuration class for environment-based settings."""
8+
9+
# Environment (defaults to "dev")
10+
ENVIRONMENT = os.getenv("ENVIRONMENT", "dev")
11+
12+
# Validate environment
13+
if ENVIRONMENT not in ["dev", "prod"]:
14+
raise ValueError(f"Invalid ENVIRONMENT value: {ENVIRONMENT}. Must be one of: dev, prod")
15+
16+
# Modal app name (matches backend app name)
17+
APP_NAME = ENVIRONMENT
18+
19+
# API Endpoints - dynamically constructed based on environment
20+
# Pattern: https://clipabit01--{env}-server-{endpoint}-{env}.modal.run
21+
SEARCH_API_URL = f"https://clipabit01--{APP_NAME}-server-search-{APP_NAME}.modal.run"
22+
UPLOAD_API_URL = f"https://clipabit01--{APP_NAME}-server-upload-{APP_NAME}.modal.run"
23+
STATUS_API_URL = f"https://clipabit01--{APP_NAME}-server-status-{APP_NAME}.modal.run"
24+
LIST_VIDEOS_API_URL = f"https://clipabit01--{APP_NAME}-server-list-videos-{APP_NAME}.modal.run"
25+
26+
# Namespace for Pinecone and R2 (web-demo for public demo)
27+
NAMESPACE = "web-demo"
28+
29+
@classmethod
30+
def get_config(cls):
31+
"""Get configuration as a dictionary."""
32+
return {
33+
"environment": cls.ENVIRONMENT,
34+
"app_name": cls.APP_NAME,
35+
"search_api_url": cls.SEARCH_API_URL,
36+
"upload_api_url": cls.UPLOAD_API_URL,
37+
"status_api_url": cls.STATUS_API_URL,
38+
"list_videos_api_url": cls.LIST_VIDEOS_API_URL,
39+
"namespace": cls.NAMESPACE,
40+
}
41+
42+
@classmethod
43+
def print_config(cls):
44+
"""Print current configuration for debugging."""
45+
config = cls.get_config()
46+
print("Current Configuration:")
47+
for key, value in config.items():
48+
print(f" {key}: {value}")

0 commit comments

Comments
 (0)