-
Notifications
You must be signed in to change notification settings - Fork 205
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Adrian Cole <[email protected]>
- Loading branch information
1 parent
4182f10
commit 76475fa
Showing
18 changed files
with
359 additions
and
512 deletions.
There are no files selected for viewing
8 changes: 8 additions & 0 deletions
8
supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/.dockerignore
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Ignore everything | ||
** | ||
|
||
# Allow specific files and directories | ||
!requirements.txt | ||
!data/ | ||
!src/ | ||
!stages/ |
18 changes: 11 additions & 7 deletions
18
supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/.env.template
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,17 @@ | ||
# Elasticsearch Configuration | ||
ELASTIC_API_KEY=your_api_key_here | ||
ELASTICSEARCH_ENDPOINT=your_elastic_endpoint | ||
# Make a copy of this file with the name .env and assign values to variables | ||
|
||
# How you connect to Elasticsearch: change details to your instance | ||
ELASTICSEARCH_URL= | ||
ELASTICSEARCH_API_KEY= | ||
# If not using API key, uncomment these and fill them in: | ||
# ELASTICSEARCH_USER=elastic | ||
# ELASTICSEARCH_PASSWORD=elastic | ||
|
||
# OpenAI Configuration | ||
OPENAI_API_KEY=your_openai_api_key_here | ||
OPENAI_API_KEY= | ||
|
||
# Model Configuration | ||
MODEL_PATH=~/.cache/torch/checkpoints/imagebind_huge.pth | ||
|
||
# Optional Configuration | ||
#LOG_LEVEL=INFO | ||
#DEBUG=False | ||
# LOG_LEVEL=INFO | ||
# DEBUG=False |
50 changes: 33 additions & 17 deletions
50
supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,36 @@ | ||
FROM ubuntu:24.04 | ||
# Use non-slim image due to OS dependencies of python packages. This gives us | ||
# git, build-essential, libglib2 (opencv) and gomp (torchaudio). | ||
FROM python:3.12 | ||
|
||
# Install necessary packages | ||
RUN apt update && apt install -y --no-install-recommends \ | ||
python3 \ | ||
python3-pip \ | ||
python3-venv \ | ||
g++ \ | ||
gcc \ | ||
python3.12-dev | ||
COPY /requirements.txt . | ||
|
||
# Create and activate a virtual environment | ||
RUN python3 -m venv /opt/venv | ||
ENV PATH="/opt/venv/bin:$PATH" | ||
# Our python requirements have some OS dependencies beyond the base layer: | ||
# | ||
# * imagebind pulls in cartopy which has OS dependencies on geos and proj | ||
# * opencv has a runtime OS dependency on libgl1-mesa-glx | ||
# | ||
# The dev dependencies are installed temporarily to compile the wheels. | ||
# We leave the only the runtime dependencies, to keep the image smaller. | ||
RUN apt-get update && \ | ||
# install build and runtime dependencies | ||
apt-get install -y --no-install-recommends \ | ||
libgeos-dev \ | ||
libproj-dev \ | ||
libgeos-c1v5 \ | ||
libproj25 \ | ||
libgl1-mesa-glx && \ | ||
# Install everything except xformers first | ||
grep -v "\bxformers\b" requirements.txt > /tmp/r.txt && pip install -r /tmp/r.txt && \ | ||
# Now, install xformers, as it should be able to see torch now | ||
grep "\bxformers\b" requirements.txt > /tmp/r.txt && pip install -r /tmp/r.txt && \ | ||
# remove build dependencies | ||
apt-get purge -y libgeos-dev libproj-dev && \ | ||
apt-get autoremove -y && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
WORKDIR /app | ||
RUN mkdir -p ./data ./src ./stages | ||
COPY ./data ./data | ||
COPY ./src ./src | ||
COPY ./stages ./stages | ||
|
||
# Install Python packages in the virtual environment | ||
RUN pip install --upgrade pip | ||
RUN pip install torch | ||
RUN pip install wheel setuptools | ||
RUN pip install transformers xformers |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
61 changes: 61 additions & 0 deletions
61
supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/docker-compose.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
name: gotham-city-crime-analysis | ||
|
||
services: | ||
verify-file-structure: | ||
build: | ||
context: . | ||
container_name: verify-file-structure | ||
restart: 'no' # no need to re-verify file structure | ||
env_file: | ||
- .env | ||
command: python stages/01-stage/files_check.py | ||
extra_hosts: # send localhost traffic to the docker host, e.g. your laptop | ||
- "localhost:host-gateway" | ||
|
||
generate-embeddings: | ||
depends_on: | ||
verify-file-structure: | ||
condition: service_completed_successfully | ||
build: | ||
context: . | ||
container_name: generate-embeddings | ||
restart: 'no' # no need to re-generate embeddings | ||
env_file: | ||
- .env | ||
command: python stages/02-stage/test_embedding_generation.py | ||
extra_hosts: # send localhost traffic to the docker host, e.g. your laptop | ||
- "localhost:host-gateway" | ||
volumes: | ||
- torch-checkpoints:/root/cache/torch/checkpoints/ | ||
|
||
index-content: | ||
depends_on: | ||
generate-embeddings: | ||
condition: service_completed_successfully | ||
build: | ||
context: . | ||
container_name: index-content | ||
restart: 'no' # no need to re-verify file structure | ||
env_file: | ||
- .env | ||
command: python stages/03-stage/index_all_modalities.py | ||
extra_hosts: # send localhost traffic to the docker host, e.g. your laptop | ||
- "localhost:host-gateway" | ||
|
||
search-and-analyze: | ||
depends_on: | ||
index-content: | ||
condition: service_completed_successfully | ||
build: | ||
context: . | ||
container_name: search-and-analyze | ||
restart: 'no' # no need to re-verify file structure | ||
env_file: | ||
- .env | ||
command: python stages/04-stage/rag_crime_analyze.py | ||
extra_hosts: # send localhost traffic to the docker host, e.g. your laptop | ||
- "localhost:host-gateway" | ||
|
||
volumes: | ||
# Avoid re-downloading a >4GB model checkpoint | ||
torch-checkpoints: |
15 changes: 0 additions & 15 deletions
15
supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/requirements.in
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.