Skip to content

Commit 01e3933

Browse files
committed
load models at docker build instead of container build
1 parent 1be01cf commit 01e3933

File tree

7 files changed

+41
-4
lines changed

7 files changed

+41
-4
lines changed

backend/search/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Search module for semantic search using CLIP embeddings and Pinecone.
33
"""
44

5-
from search.embedder import TextEmbedder
5+
from search.text_embedder import TextEmbedder
66
from search.searcher import Searcher
77

88
__all__ = ["TextEmbedder", "Searcher"]

backend/search/searcher.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from database.pinecone_connector import PineconeConnector
1212
from database.r2_connector import R2Connector
13-
from search.embedder import TextEmbedder
13+
from search.text_embedder import TextEmbedder
1414

1515
logging.basicConfig(level=logging.INFO)
1616
logger = logging.getLogger(__name__)

backend/services/processing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class ProcessingService:
2121
def startup(self):
2222
"""Load CLIP model and initialize all connectors."""
2323
from preprocessing.preprocessor import Preprocessor
24-
from embeddings.embedder import VideoEmbedder
24+
from embeddings.video_embedder import VideoEmbedder
2525
from database.pinecone_connector import PineconeConnector
2626
from database.job_store_connector import JobStoreConnector
2727
from database.r2_connector import R2Connector

backend/services/search.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def startup(self):
2222
"""Load CLIP text encoder and initialize connectors."""
2323
from database.pinecone_connector import PineconeConnector
2424
from database.r2_connector import R2Connector
25-
from search.embedder import TextEmbedder
25+
from search.text_embedder import TextEmbedder
2626

2727
env = get_environment()
2828
logger.info(f"[{self.__class__.__name__}] Starting up in '{env}' environment")

backend/shared/images.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,23 @@
99

1010
import modal
1111

12+
def _download_all_clip_models():
13+
"""Pre-download all CLIP models at image build time."""
14+
from transformers import CLIPModel, CLIPProcessor, CLIPTextModelWithProjection, CLIPTokenizer
15+
model_name = "openai/clip-vit-base-patch32"
16+
# Full model for video processing
17+
CLIPModel.from_pretrained(model_name)
18+
CLIPProcessor.from_pretrained(model_name, use_fast=True)
19+
# Text-only model for search
20+
CLIPTokenizer.from_pretrained(model_name)
21+
CLIPTextModelWithProjection.from_pretrained(model_name)
22+
23+
1224
def get_dev_image() -> modal.Image:
1325
"""
1426
Create the Modal image for the dev app.
27+
28+
Pre-downloads all models at build time to eliminate cold start downloads.
1529
"""
1630
return (
1731
modal.Image.debian_slim(python_version="3.12")
@@ -29,6 +43,7 @@ def get_dev_image() -> modal.Image:
2943
"scenedetect",
3044
"pillow",
3145
)
46+
.run_function(_download_all_clip_models)
3247
.add_local_python_source(
3348
"api",
3449
"database",
@@ -66,12 +81,22 @@ def get_server_image() -> modal.Image:
6681
)
6782
)
6883

84+
def _download_clip_text_model():
85+
"""Pre-download CLIP text encoder at image build time."""
86+
from transformers import CLIPTextModelWithProjection, CLIPTokenizer
87+
model_name = "openai/clip-vit-base-patch32"
88+
CLIPTokenizer.from_pretrained(model_name)
89+
CLIPTextModelWithProjection.from_pretrained(model_name)
90+
91+
6992
def get_search_image() -> modal.Image:
7093
"""
7194
Create the Modal image for the Search app.
7295
7396
Medium dependencies - includes CLIP text encoder only.
7497
The text encoder (~150MB) is much lighter than the full CLIP model (~350MB).
98+
99+
Pre-downloads the model at build time to eliminate cold start downloads.
75100
"""
76101
return (
77102
modal.Image.debian_slim(python_version="3.12")
@@ -83,6 +108,7 @@ def get_search_image() -> modal.Image:
83108
"boto3",
84109
"numpy",
85110
)
111+
.run_function(_download_clip_text_model)
86112
.add_local_python_source(
87113
"database",
88114
"search",
@@ -92,12 +118,22 @@ def get_search_image() -> modal.Image:
92118
)
93119

94120

121+
def _download_clip_full_model():
122+
"""Pre-download full CLIP model (vision + text) at image build time."""
123+
from transformers import CLIPModel, CLIPProcessor
124+
model_name = "openai/clip-vit-base-patch32"
125+
CLIPModel.from_pretrained(model_name)
126+
CLIPProcessor.from_pretrained(model_name, use_fast=True)
127+
128+
95129
def get_processing_image() -> modal.Image:
96130
"""
97131
Create the Modal image for the Processing app.
98132
99133
Heavy dependencies for video processing pipeline.
100134
Includes: ffmpeg, opencv, scenedetect, full CLIP model, etc.
135+
136+
Pre-downloads the model at build time to eliminate cold start downloads.
101137
"""
102138
return (
103139
modal.Image.debian_slim(python_version="3.12")
@@ -113,6 +149,7 @@ def get_processing_image() -> modal.Image:
113149
"pinecone",
114150
"boto3",
115151
)
152+
.run_function(_download_clip_full_model)
116153
.add_local_python_source(
117154
"database",
118155
"preprocessing",

0 commit comments

Comments
 (0)