From 76475faa222738314bce4e33110bc3b03c515d08 Mon Sep 17 00:00:00 2001 From: Adrian Cole Date: Wed, 26 Feb 2025 20:16:05 +0800 Subject: [PATCH] polish-and-docker Signed-off-by: Adrian Cole --- .../.dockerignore | 8 + .../.env.template | 18 +- .../Dockerfile | 50 ++-- .../README.md | 67 ++--- .../docker-compose.yml | 61 ++++ .../requirements.in | 15 - .../requirements.txt | 274 +----------------- .../src/elastic_manager.py | 61 ++-- .../src/embedding_generator.py | 74 ++--- .../src/llm_analyzer.py | 47 +-- .../stages/01-stage/files_check.py | 6 +- .../02-stage/test_embedding_generation.py | 10 +- .../stages/03-stage/index_all_modalities.py | 51 +++- .../stages/03-stage/search_by_audio.py | 22 +- .../stages/03-stage/search_by_depth.py | 25 +- .../stages/03-stage/search_by_image.py | 26 +- .../stages/03-stage/search_by_text.py | 22 +- .../stages/04-stage/rag_crime_analyze.py | 34 ++- 18 files changed, 359 insertions(+), 512 deletions(-) create mode 100644 supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/.dockerignore create mode 100644 supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/docker-compose.yml delete mode 100644 supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/requirements.in diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/.dockerignore b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/.dockerignore new file mode 100644 index 00000000..c93b7331 --- /dev/null +++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/.dockerignore @@ -0,0 +1,8 @@ +# Ignore everything +** + +# Allow specific files and directories +!requirements.txt +!data/ +!src/ +!stages/ diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/.env.template b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/.env.template index 7d9c72a1..383c870e 100644 --- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/.env.template +++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/.env.template @@ -1,13 +1,17 @@ -# Elasticsearch Configuration -ELASTIC_API_KEY=your_api_key_here -ELASTICSEARCH_ENDPOINT=your_elastic_endpoint +# Make a copy of this file with the name .env and assign values to variables + +# How you connect to Elasticsearch: change details to your instance +ELASTICSEARCH_URL= +ELASTICSEARCH_API_KEY= +# If not using API key, uncomment these and fill them in: +# ELASTICSEARCH_USER=elastic +# ELASTICSEARCH_PASSWORD=elastic # OpenAI Configuration -OPENAI_API_KEY=your_openai_api_key_here +OPENAI_API_KEY= # Model Configuration -MODEL_PATH=~/.cache/torch/checkpoints/imagebind_huge.pth # Optional Configuration -#LOG_LEVEL=INFO -#DEBUG=False \ No newline at end of file +# LOG_LEVEL=INFO +# DEBUG=False \ No newline at end of file diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/Dockerfile b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/Dockerfile index efeb328c..2b22ab0d 100644 --- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/Dockerfile +++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/Dockerfile @@ -1,20 +1,36 @@ -FROM ubuntu:24.04 +# Use non-slim image due to OS dependencies of python packages. This gives us +# git, build-essential, libglib2 (opencv) and gomp (torchaudio). +FROM python:3.12 -# Install necessary packages -RUN apt update && apt install -y --no-install-recommends \ - python3 \ - python3-pip \ - python3-venv \ - g++ \ - gcc \ - python3.12-dev +COPY /requirements.txt . -# Create and activate a virtual environment -RUN python3 -m venv /opt/venv -ENV PATH="/opt/venv/bin:$PATH" +# Our python requirements have some OS dependencies beyond the base layer: +# +# * imagebind pulls in cartopy which has OS dependencies on geos and proj +# * opencv has a runtime OS dependency on libgl1-mesa-glx +# +# The dev dependencies are installed temporarily to compile the wheels. +# We leave the only the runtime dependencies, to keep the image smaller. +RUN apt-get update && \ + # install build and runtime dependencies + apt-get install -y --no-install-recommends \ + libgeos-dev \ + libproj-dev \ + libgeos-c1v5 \ + libproj25 \ + libgl1-mesa-glx && \ + # Install everything except xformers first + grep -v "\bxformers\b" requirements.txt > /tmp/r.txt && pip install -r /tmp/r.txt && \ + # Now, install xformers, as it should be able to see torch now + grep "\bxformers\b" requirements.txt > /tmp/r.txt && pip install -r /tmp/r.txt && \ + # remove build dependencies + apt-get purge -y libgeos-dev libproj-dev && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /app +RUN mkdir -p ./data ./src ./stages +COPY ./data ./data +COPY ./src ./src +COPY ./stages ./stages -# Install Python packages in the virtual environment -RUN pip install --upgrade pip -RUN pip install torch -RUN pip install wheel setuptools -RUN pip install transformers xformers diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/README.md b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/README.md index d22e0232..a5934a47 100644 --- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/README.md +++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/README.md @@ -11,67 +11,34 @@ The pipeline demonstrates how to: ## Prerequisites -- Python 3.10+ +- A Docker runtime with 8GB+ free ram + - GPU is optional, but recommended - Elasticsearch cluster (cloud or local) - OpenAI API key - Setup an OpenAI account and create a [secret key](https://platform.openai.com/docs/quickstart) -- 8GB+ RAM -- GPU (optional but recommended) ## Quick Start -1. **Setup Environment** -```bash -rm -rf .venv requirements.txt -python3 -m venv .venv -source .venv/bin/activate -pip install pip-tools -# Recreate requirements.txt -pip-compile -# Install main dependencies -pip install -r requirements.txt - - - -python3 -m venv .venv -source .venv/bin/activate -pip install "python-dotenv[cli]" -pip install -r requirements-torch.txt -pip install -r requirements.txt - -# Make sure you have pytorch installed and Python 3.10+ -pip install torch torchvision torchaudio - -# Create and activate virtual environment -python -m venv env_mmrag -source env_mmrag/bin/activate # Unix/MacOS -# or -.\env_mmrag\Scripts\activate # Windows - -# Install dependencies -pip install -r requirements.txt -``` +This example runs four stages as docker compose services: -2. **Configure Credentials** -Create a `.env` file: -```env -ELASTICSEARCH_ENDPOINT="your-elasticsearch-endpoint" -ELASTIC_API_KEY="your-elastic-api-key" -OPENAI_API_KEY="your-openai-api-key" +```mermaid +graph TD + verify-file-structure --> generate-embeddings + generate-embeddings --> index-content + index-content --> search-and-analyze ``` -3. **Run the Demo** -```bash -# Verify file structure -python stages/01-stage/files_check.py +First, copy [env.example](env.example) to `.env` and fill in values noted inside. -# Generate embeddings -python stages/02-stage/test_embedding_generation.py +Now, enter below to run the pipeline: +```bash +docker compose run --build --rm search-and-analyze +``` -# Index content -python stages/03-stage/index_all_modalities.py +The first time takes a while to build the image and download ImageBind weights. -# Search and analyze -python stages/04-stage/rag_crime_analyze.py +If you want to re-run just one stage, add `--no-deps` like this: +```bash +docker compose run --no-deps --build --rm search-and-analyze ``` ## Project Structure diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/docker-compose.yml b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/docker-compose.yml new file mode 100644 index 00000000..024ccd7b --- /dev/null +++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/docker-compose.yml @@ -0,0 +1,61 @@ +name: gotham-city-crime-analysis + +services: + verify-file-structure: + build: + context: . + container_name: verify-file-structure + restart: 'no' # no need to re-verify file structure + env_file: + - .env + command: python stages/01-stage/files_check.py + extra_hosts: # send localhost traffic to the docker host, e.g. your laptop + - "localhost:host-gateway" + + generate-embeddings: + depends_on: + verify-file-structure: + condition: service_completed_successfully + build: + context: . + container_name: generate-embeddings + restart: 'no' # no need to re-generate embeddings + env_file: + - .env + command: python stages/02-stage/test_embedding_generation.py + extra_hosts: # send localhost traffic to the docker host, e.g. your laptop + - "localhost:host-gateway" + volumes: + - torch-checkpoints:/root/cache/torch/checkpoints/ + + index-content: + depends_on: + generate-embeddings: + condition: service_completed_successfully + build: + context: . + container_name: index-content + restart: 'no' # no need to re-verify file structure + env_file: + - .env + command: python stages/03-stage/index_all_modalities.py + extra_hosts: # send localhost traffic to the docker host, e.g. your laptop + - "localhost:host-gateway" + + search-and-analyze: + depends_on: + index-content: + condition: service_completed_successfully + build: + context: . + container_name: search-and-analyze + restart: 'no' # no need to re-verify file structure + env_file: + - .env + command: python stages/04-stage/rag_crime_analyze.py + extra_hosts: # send localhost traffic to the docker host, e.g. your laptop + - "localhost:host-gateway" + +volumes: + # Avoid re-downloading a >4GB model checkpoint + torch-checkpoints: diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/requirements.in b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/requirements.in deleted file mode 100644 index 74d6bb64..00000000 --- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/requirements.in +++ /dev/null @@ -1,15 +0,0 @@ -elasticsearch -torch -torchvision -torchaudio -imagebind @ git+https://github.com/hkchengrex/ImageBind.git -openai -python-dotenv -numpy -pillow -opencv-python -librosa -matplotlib -wheel -setuptools -xformers \ No newline at end of file diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/requirements.txt b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/requirements.txt index 1b701e27..9517fa76 100644 --- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/requirements.txt +++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/requirements.txt @@ -1,261 +1,15 @@ -# -# This file is autogenerated by pip-compile with Python 3.12 -# by the following command: -# -# pip-compile -# -annotated-types==0.7.0 - # via pydantic -anyio==4.8.0 - # via - # httpx - # openai -audioread==3.0.1 - # via librosa -av==14.1.0 - # via pytorchvideo -cartopy==0.24.1 - # via imagebind -certifi==2025.1.31 - # via - # elastic-transport - # httpcore - # httpx - # pyproj - # requests -cffi==1.17.1 - # via soundfile -charset-normalizer==3.4.1 - # via requests -contourpy==1.3.1 - # via matplotlib -cycler==0.12.1 - # via matplotlib -decorator==5.2.1 - # via librosa -distro==1.9.0 - # via openai -einops==0.8.1 - # via imagebind -elastic-transport==8.17.0 - # via elasticsearch -elasticsearch==8.17.1 - # via -r requirements.in -filelock==3.17.0 - # via - # huggingface-hub - # torch -fonttools==4.56.0 - # via matplotlib -fsspec==2025.2.0 - # via - # huggingface-hub - # torch -ftfy==6.3.1 - # via imagebind -fvcore==0.1.5.post20221221 - # via - # imagebind - # pytorchvideo -h11==0.14.0 - # via httpcore -httpcore==1.0.7 - # via httpx -httpx==0.28.1 - # via openai -huggingface-hub==0.29.1 - # via timm -idna==3.10 - # via - # anyio - # httpx - # requests +elasticsearch~=8.17.1 +torch~=2.6.0 +torchvision~=0.21.0 +torchaudio~=2.6.0 imagebind @ git+https://github.com/hkchengrex/ImageBind.git - # via -r requirements.in -iopath==0.1.10 - # via - # fvcore - # imagebind - # pytorchvideo -jinja2==3.1.5 - # via torch -jiter==0.8.2 - # via openai -joblib==1.4.2 - # via - # librosa - # scikit-learn -kiwisolver==1.4.8 - # via matplotlib -lazy-loader==0.4 - # via librosa -librosa==0.10.2.post1 - # via -r requirements.in -llvmlite==0.44.0 - # via numba -markupsafe==3.0.2 - # via jinja2 -matplotlib==3.10.0 - # via - # -r requirements.in - # cartopy - # imagebind -mpmath==1.3.0 - # via sympy -msgpack==1.1.0 - # via librosa -networkx==3.4.2 - # via - # pytorchvideo - # torch -numba==0.61.0 - # via librosa -numpy==2.1.3 - # via - # -r requirements.in - # cartopy - # contourpy - # fvcore - # imagebind - # librosa - # matplotlib - # numba - # opencv-python - # scikit-learn - # scipy - # shapely - # soundfile - # soxr - # torchvision -openai==1.64.0 - # via -r requirements.in -opencv-python==4.11.0.86 - # via -r requirements.in -packaging==24.2 - # via - # cartopy - # huggingface-hub - # lazy-loader - # matplotlib - # pooch -parameterized==0.9.0 - # via pytorchvideo -pillow==11.1.0 - # via - # -r requirements.in - # fvcore - # matplotlib - # torchvision -platformdirs==4.3.6 - # via pooch -pooch==1.8.2 - # via librosa -portalocker==3.1.1 - # via iopath -pycparser==2.22 - # via cffi -pydantic==2.10.6 - # via openai -pydantic-core==2.27.2 - # via pydantic -pyparsing==3.2.1 - # via matplotlib -pyproj==3.7.1 - # via cartopy -pyshp==2.3.1 - # via cartopy -python-dateutil==2.9.0.post0 - # via matplotlib -python-dotenv==1.0.1 - # via -r requirements.in -pytorchvideo @ git+https://github.com/facebookresearch/pytorchvideo.git@ae9cfc6e62ca49eb9721a7a56e1e13e348ad21dc - # via imagebind -pyyaml==6.0.2 - # via - # fvcore - # huggingface-hub - # timm - # yacs -regex==2024.11.6 - # via imagebind -requests==2.32.3 - # via - # huggingface-hub - # pooch -safetensors==0.5.2 - # via timm -scikit-learn==1.6.1 - # via librosa -scipy==1.15.2 - # via - # librosa - # scikit-learn -shapely==2.0.7 - # via cartopy -six==1.17.0 - # via python-dateutil -sniffio==1.3.1 - # via - # anyio - # openai -soundfile==0.13.1 - # via librosa -soxr==0.5.0.post1 - # via librosa -sympy==1.13.1 - # via torch -tabulate==0.9.0 - # via fvcore -termcolor==2.5.0 - # via fvcore -threadpoolctl==3.5.0 - # via scikit-learn -timm==1.0.15 - # via imagebind -torch==2.6.0 - # via - # -r requirements.in - # imagebind - # timm - # torchaudio - # torchvision -torchaudio==2.6.0 - # via - # -r requirements.in - # imagebind -torchvision==0.21.0 - # via - # -r requirements.in - # imagebind - # timm -tqdm==4.67.1 - # via - # fvcore - # huggingface-hub - # iopath - # openai -types-regex==2024.11.6.20241221 - # via imagebind -typing-extensions==4.12.2 - # via - # anyio - # huggingface-hub - # iopath - # librosa - # openai - # pydantic - # pydantic-core - # torch -urllib3==2.3.0 - # via - # elastic-transport - # requests -wcwidth==0.2.13 - # via ftfy -wheel==0.45.1 - # via -r requirements.in -yacs==0.1.8 - # via fvcore - -# The following packages are considered to be unsafe in a requirements file: -# setuptools +openai~=1.64.0 +python-dotenv~=1.0.1 +numpy~=2.1.3 +pillow~=11.1.0 +opencv-python~=4.11.0 +librosa~=0.10.2 +matplotlib~=3.10.0 +wheel~=0.45.1 +setuptools +xformers~=0.0.29 diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/elastic_manager.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/elastic_manager.py index f7fbcd6c..df5597e7 100644 --- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/elastic_manager.py +++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/elastic_manager.py @@ -4,15 +4,16 @@ from dotenv import load_dotenv import numpy as np + class ElasticsearchManager: """Manages multimodal operations in Elasticsearch""" - + def __init__(self): load_dotenv() # Load variables from .env self.es = self._connect_elastic() self.index_name = "multimodal_content" self._setup_index() - + def _connect_elastic(self): """Connects to Elasticsearch""" ELASTICSEARCH_URL = os.getenv("ELASTICSEARCH_URL") @@ -33,7 +34,7 @@ def _connect_elastic(self): raise ValueError( "Please provide either ELASTICSEARCH_USER or ELASTICSEARCH_API_KEY" ) - + def _setup_index(self): """Sets up the index if it doesn't exist""" if not self.es.indices.exists(index=self.index_name): @@ -44,33 +45,45 @@ def _setup_index(self): "type": "dense_vector", "dims": 1024, "index": True, - "similarity": "cosine" + "similarity": "cosine", }, "modality": {"type": "keyword"}, "content": {"type": "binary"}, "description": {"type": "text"}, "metadata": {"type": "object"}, - "content_path": {"type": "text"} + "content_path": {"type": "text"}, } } } self.es.indices.create(index=self.index_name, body=mapping) - - def index_content(self, embedding, modality, content=None, description="", metadata=None, content_path=None): + + def index_content( + self, + embedding, + modality, + content=None, + description="", + metadata=None, + content_path=None, + ): """Indexes multimodal content""" doc = { "embedding": embedding.tolist(), "modality": modality, "description": description, "metadata": metadata or {}, - "content_path": content_path + "content_path": content_path, } - + if content: - doc["content"] = base64.b64encode(content).decode() if isinstance(content, bytes) else content - + doc["content"] = ( + base64.b64encode(content).decode() + if isinstance(content, bytes) + else content + ) + return self.es.index(index=self.index_name, document=doc) - + def search_similar(self, query_embedding, modality=None, k=5): """Searches for similar contents""" query = { @@ -79,23 +92,19 @@ def search_similar(self, query_embedding, modality=None, k=5): "query_vector": query_embedding.tolist(), "k": k, "num_candidates": 100, - "filter": [{"term": {"modality": modality}}] if modality else [] + "filter": [{"term": {"modality": modality}}] if modality else [], } } - + try: - response = self.es.search( - index=self.index_name, - query=query, - size=k - ) - + response = self.es.search(index=self.index_name, query=query, size=k) + # Return both source data and score for each hit - return [{ - **hit["_source"], - "score": hit["_score"] - } for hit in response["hits"]["hits"]] - + return [ + {**hit["_source"], "score": hit["_score"]} + for hit in response["hits"]["hits"] + ] + except Exception as e: print(f"Error: processing search_evidence: {str(e)}") - return "Error generating search evidence" \ No newline at end of file + return "Error generating search evidence" diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/embedding_generator.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/embedding_generator.py index 8e3e0d3f..9d1d337d 100644 --- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/embedding_generator.py +++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/embedding_generator.py @@ -16,87 +16,93 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) + class EmbeddingGenerator: """Generates multimodal embeddings using ImageBind""" - + def __init__(self, device="cpu"): self.device = device self.model = self._load_model() - + def _load_model(self): """Initialize and test the ImageBind model.""" - checkpoint_path = "~/.cache/torch/checkpoints/imagebind_huge.pth" - os.makedirs(os.path.expanduser("~/.cache/torch/checkpoints"), exist_ok=True) - if not os.path.exists(os.path.expanduser(checkpoint_path)): + checkpoint_path = os.path.expanduser( + "~/.cache/torch/checkpoints/imagebind_huge.pth" + ) + logger.info(f"Using cache {checkpoint_path}") + os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True) + + if not os.path.exists(checkpoint_path): print("Downloading ImageBind weights...") download_url_to_file( "https://dl.fbaipublicfiles.com/imagebind/imagebind_huge.pth", - os.path.expanduser(checkpoint_path) + checkpoint_path, ) - + try: - checkpoint_path = os.path.expanduser("~/.cache/torch/checkpoints/imagebind_huge.pth") - # Check if file exists if not os.path.exists(checkpoint_path): raise FileNotFoundError(f"Checkpoint not found: {checkpoint_path}") - + model = imagebind_model.imagebind_huge(pretrained=False) model.load_state_dict(torch.load(checkpoint_path)) model.eval().to(self.device) - + # Quick test with empty text input logger.info("Testing model with sample input...") test_input = data.load_and_transform_text([""], self.device) with torch.no_grad(): _ = model({"text": test_input}) - + logger.info("šŸ¤– ImageBind model initialized successfully") return model except Exception as e: logger.error(f"šŸšØ Model initialization failed: {str(e)}") raise - + def generate_embedding(self, input_data, modality): """Generates embedding for different modalities""" processors = { "vision": lambda x: data.load_and_transform_vision_data(x, self.device), "audio": lambda x: data.load_and_transform_audio_data(x, self.device), "text": lambda x: data.load_and_transform_text(x, self.device), - "depth": self.process_depth + "depth": self.process_depth, } - + try: # Input type verification if not isinstance(input_data, list): - raise ValueError(f"Input data must be a list. Received: {type(input_data)}") - + raise ValueError( + f"Input data must be a list. Received: {type(input_data)}" + ) + # Convert input data to a tensor format that the model can process - # For images: [batch_size, channels, height, width] - # For audio: [batch_size, channels, time] + # For images: [batch_size, channels, height, width] + # For audio: [batch_size, channels, time] # For text: [batch_size, sequence_length] inputs = {modality: processors[modality](input_data)} with torch.no_grad(): embedding = self.model(inputs)[modality] return embedding.squeeze(0).cpu().numpy() except Exception as e: - logger.error(f"Error generating {modality} embedding: {str(e)}", exc_info=True) + logger.error( + f"Error generating {modality} embedding: {str(e)}", exc_info=True + ) raise - def process_vision(self, image_path): """Processes image""" return data.load_and_transform_vision_data([image_path], self.device) - + def process_audio(self, audio_path): """Processes audio""" return data.load_and_transform_audio_data([audio_path], self.device) - + def process_text(self, text): """Processes text""" return data.load_and_transform_text([text], self.device) - + def process_depth(self, depth_paths, device="cpu"): """Custom processing for depth maps""" try: @@ -104,17 +110,19 @@ def process_depth(self, depth_paths, device="cpu"): for path in depth_paths: if not os.path.exists(path): raise FileNotFoundError(f"Depth map file not found: {path}") - + # Load and transform depth_images = [Image.open(path).convert("L") for path in depth_paths] - - transform = transforms.Compose([ - transforms.Resize((224, 224)), - transforms.ToTensor(), - ]) - + + transform = transforms.Compose( + [ + transforms.Resize((224, 224)), + transforms.ToTensor(), + ] + ) + return torch.stack([transform(img) for img in depth_images]).to(device) - + except Exception as e: logger.error(f"šŸšØ - Error processing depth map: {str(e)}") - raise \ No newline at end of file + raise diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/llm_analyzer.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/llm_analyzer.py index 1d2a11c5..13cfc174 100644 --- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/llm_analyzer.py +++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/llm_analyzer.py @@ -6,17 +6,18 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) + class LLMAnalyzer: """Evidence analyzer using GPT-4""" - + def __init__(self): load_dotenv() self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) - + def analyze_evidence(self, evidence_results): """ Analyzes multimodal search results and generates a report - + Args: evidence_results: Dict with results by modality { @@ -63,40 +64,42 @@ def analyze_evidence(self, evidence_results): messages=[ { "role": "system", - "content": "You are a forensic detective specialized in multimodal evidence analysis." + "content": "You are a forensic detective specialized in multimodal evidence analysis.", }, - {"role": "user", "content": prompt} + {"role": "user", "content": prompt}, ], temperature=0.2, - max_tokens=1000 + max_tokens=1000, ) - + report = response.choices[0].message.content logger.info("\nšŸ“‹ Forensic Report Generated:") logger.info("=" * 50) logger.info(report) logger.info("=" * 50) - + return report - + except Exception as e: logger.error(f"Error generating report: {str(e)}") return None - + def _format_evidence(self, evidence_results): """Formats evidence for the prompt""" formatted = [] - + for modality, results in evidence_results.items(): formatted.append(f"\n{modality.upper()}:") for i, result in enumerate(results, 1): - description = result.get('description', 'No description') - similarity = result.get('score', 0) + description = result.get("description", "No description") + similarity = result.get("score", 0) formatted.append(f"{i}. {description} (Similarity: {similarity:.2f})") - + return "\n".join(formatted) - def analyze_cross_modal_connections(self, results_a, modality_a, results_b, modality_b): + def analyze_cross_modal_connections( + self, results_a, modality_a, results_b, modality_b + ): """Analyzes specific connections between two different modalities""" prompt = f"""Analyze the relationship between the following evidence from different modalities: @@ -120,20 +123,20 @@ def analyze_cross_modal_connections(self, results_a, modality_a, results_b, moda messages=[ { "role": "system", - "content": "You are an expert in forensic analysis of multimodal evidence." + "content": "You are an expert in forensic analysis of multimodal evidence.", }, - {"role": "user", "content": prompt} + {"role": "user", "content": prompt}, ], temperature=0.7, - max_tokens=500 + max_tokens=500, ) - + analysis = response.choices[0].message.content logger.info(f"\nšŸ” Cross-Modal Analysis ({modality_a} x {modality_b}):") logger.info(analysis) - + return analysis - + except Exception as e: logger.error(f"Error: in cross-modal analysis: {str(e)}") - return None \ No newline at end of file + return None diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/01-stage/files_check.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/01-stage/files_check.py index 875727e8..6c8dfe48 100644 --- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/01-stage/files_check.py +++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/01-stage/files_check.py @@ -5,10 +5,10 @@ # List of expected files evidences = { - "images": ["crime_scene1.jpg","crime_scene1.jpg", "joker_alley.jpg"], + "images": ["crime_scene1.jpg", "crime_scene1.jpg", "joker_alley.jpg"], "audios": ["joker_laugh.wav"], "texts": ["riddle.txt", "note2.txt"], - "depths": ["depth_suspect.png"] + "depths": ["depth_suspect.png"], } # Create directories if they don't exist @@ -21,4 +21,4 @@ if not os.path.exists(file_path): print(f"Warning: {file} not found in {category_path}.") -print("All files are correctly organized!") \ No newline at end of file +print("All files are correctly organized!") diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/02-stage/test_embedding_generation.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/02-stage/test_embedding_generation.py index 8e74cede..cf14afa9 100644 --- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/02-stage/test_embedding_generation.py +++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/02-stage/test_embedding_generation.py @@ -2,7 +2,9 @@ import os # Add the src directory to Python path -sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'src')) +sys.path.append( + os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src") +) from embedding_generator import EmbeddingGenerator @@ -10,8 +12,10 @@ generator = EmbeddingGenerator() # Generate embedding for the image -image_embedding = generator.generate_embedding(["data/images/crime_scene1.jpg"], "vision") +image_embedding = generator.generate_embedding( + ["data/images/crime_scene1.jpg"], "vision" +) # Print the shape print(image_embedding.shape) -# Expected Output: (1024,) \ No newline at end of file +# Expected Output: (1024,) diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/index_all_modalities.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/index_all_modalities.py index fbfc4d78..bb2476ae 100644 --- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/index_all_modalities.py +++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/index_all_modalities.py @@ -1,6 +1,9 @@ import sys import os -sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'src')) + +sys.path.append( + os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src") +) from embedding_generator import EmbeddingGenerator from elastic_manager import ElasticsearchManager @@ -15,6 +18,7 @@ # Load environment variables load_dotenv() + def process_evidence(generator, es_manager, file_path, modality, description, metadata): """Helper function to process each piece of evidence""" try: @@ -28,20 +32,21 @@ def process_evidence(generator, es_manager, file_path, modality, description, me modality=modality, description=description, content_path=file_path, - metadata=metadata + metadata=metadata, ) - + # Convert Elasticsearch response to dict for JSON serialization response_dict = { "result": response["result"], "_id": response["_id"], - "_index": response["_index"] + "_index": response["_index"], } logger.info(f"\n\nIndexed {modality}: {json.dumps(response_dict, indent=2)}") - + except Exception as e: logger.error(f"Error processing {modality}: {str(e)}") + def main(): # Initialize components generator = EmbeddingGenerator() @@ -57,44 +62,59 @@ def main(): "file_path": "data/images/crime_scene1.jpg", "modality": "vision", "description": "Photo of the crime scene: A dark, rain-soaked alley is filled with playing cards, while a sinister graffiti of the Joker laughing stands out on the brick wall.", - "metadata": {"location": "Gotham Central Bank", "timestamp": "2025-01-30 23:15"} + "metadata": { + "location": "Gotham Central Bank", + "timestamp": "2025-01-30 23:15", + }, }, { "file_path": "data/images/joker_laughing.png", "modality": "vision", "description": "The Joker with green hair, white face paint, and a sinister smile in an urban night setting.", - "metadata": {"location": "Gotham Theatre", "timestamp": "2025-01-30 23:25"} + "metadata": {"location": "Gotham Theatre", "timestamp": "2025-01-30 23:25"}, }, { "file_path": "data/images/jdancing.png", "modality": "vision", "description": "Suspect dancing", - "metadata": {"location": "Gotham Central Station", "timestamp": "2025-01-30 23:18"} + "metadata": { + "location": "Gotham Central Station", + "timestamp": "2025-01-30 23:18", + }, }, { "file_path": "data/audios/joker_laugh.wav", "modality": "audio", "description": "A sinister laugh captured near the crime scene", - "metadata": {"location": "Gotham Central Bank - Main Hall", "timestamp": "2025-01-30 23:16"} + "metadata": { + "location": "Gotham Central Bank - Main Hall", + "timestamp": "2025-01-30 23:16", + }, }, { "file_path": "data/texts/note2.txt", "modality": "text", "description": "Why so serious", - "metadata": {"location": "Gotham Theatre", "timestamp": "2025-01-30 23:25"} + "metadata": {"location": "Gotham Theatre", "timestamp": "2025-01-30 23:25"}, }, { "file_path": "data/texts/riddle.txt", "modality": "text", "description": "Mysterious note found at the location", - "metadata": {"location": "Gotham Central Bank - Vault", "timestamp": "2025-01-30 23:20"} + "metadata": { + "location": "Gotham Central Bank - Vault", + "timestamp": "2025-01-30 23:20", + }, }, { "file_path": "data/depths/depth_suspect.png", "modality": "depth", "description": "Depth sensor capture of the suspect", - "metadata": {"location": "Gotham Central Bank - Back Alley", "timestamp": "2025-01-30 23:18"} - } + "metadata": { + "location": "Gotham Central Bank - Back Alley", + "timestamp": "2025-01-30 23:18", + }, + }, ] # Process each piece of evidence @@ -105,8 +125,9 @@ def main(): evidence["file_path"], evidence["modality"], evidence["description"], - evidence["metadata"] + evidence["metadata"], ) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_audio.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_audio.py index 453f4d1c..2963510a 100644 --- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_audio.py +++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_audio.py @@ -1,6 +1,9 @@ import sys import os -sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'src')) + +sys.path.append( + os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src") +) from embedding_generator import EmbeddingGenerator from elastic_manager import ElasticsearchManager @@ -23,19 +26,16 @@ audio_embedding = generator.generate_embedding(["data/audios/joker_laugh.wav"], "audio") # Search for similar evidence in Elasticsearch -similar_evidences = es_manager.search_similar( - query_embedding=audio_embedding, - k=3 -) +similar_evidences = es_manager.search_similar(query_embedding=audio_embedding, k=3) # Display the retrieved results print("\nšŸ”Ž Similar evidence found:\n") for i, evidence in enumerate(similar_evidences, start=1): - description = evidence['description'] - modality = evidence['modality'] - score = evidence['score'] - content_path = evidence.get('content_path', 'N/A') - + description = evidence["description"] + modality = evidence["modality"] + score = evidence["score"] + content_path = evidence.get("content_path", "N/A") + print(f"{i}. {description} ({modality})") print(f" Similarity: {score:.4f}") - print(f" File path: {content_path}\n") \ No newline at end of file + print(f" File path: {content_path}\n") diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_depth.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_depth.py index 95f998c2..001985f8 100644 --- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_depth.py +++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_depth.py @@ -1,6 +1,9 @@ import sys import os -sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'src')) + +sys.path.append( + os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src") +) from embedding_generator import EmbeddingGenerator from elastic_manager import ElasticsearchManager @@ -20,23 +23,23 @@ es_manager = ElasticsearchManager() # Generate embedding for a suspicious depth map -vision_embedding = generator.generate_embedding(["data/depths/jdancing-depth.png"], "depth") +vision_embedding = generator.generate_embedding( + ["data/depths/jdancing-depth.png"], "depth" +) # Search for similar evidence in Elasticsearch similar_evidences = es_manager.search_similar( - query_embedding=vision_embedding, - modality="vision", - k=3 + query_embedding=vision_embedding, modality="vision", k=3 ) # Display the retrieved results print("\nšŸ”Ž Similar evidence found:\n") for i, evidence in enumerate(similar_evidences, start=1): - description = evidence['description'] - modality = evidence['modality'] - score = evidence['score'] - content_path = evidence.get('content_path', 'N/A') - + description = evidence["description"] + modality = evidence["modality"] + score = evidence["score"] + content_path = evidence.get("content_path", "N/A") + print(f"{i}. {description} ({modality})") print(f" Similarity: {score:.4f}") - print(f" File path: {content_path}\n") \ No newline at end of file + print(f" File path: {content_path}\n") diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_image.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_image.py index d88649d1..b55efc94 100644 --- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_image.py +++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_image.py @@ -1,6 +1,9 @@ import sys import os -sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'src')) + +sys.path.append( + os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src") +) from embedding_generator import EmbeddingGenerator from elastic_manager import ElasticsearchManager @@ -20,22 +23,21 @@ es_manager = ElasticsearchManager() # Generate embedding for a suspicious image -vision_embedding = generator.generate_embedding(["data/images/crime_scene2.jpg"], "vision") +vision_embedding = generator.generate_embedding( + ["data/images/crime_scene2.jpg"], "vision" +) # Search for similar evidence in Elasticsearch -similar_evidences = es_manager.search_similar( - query_embedding=vision_embedding, - k=3 -) +similar_evidences = es_manager.search_similar(query_embedding=vision_embedding, k=3) # Display the retrieved results print("\nšŸ”Ž Similar evidence found:\n") for i, evidence in enumerate(similar_evidences, start=1): - description = evidence['description'] - modality = evidence['modality'] - score = evidence['score'] - content_path = evidence.get('content_path', 'N/A') - + description = evidence["description"] + modality = evidence["modality"] + score = evidence["score"] + content_path = evidence.get("content_path", "N/A") + print(f"{i}. {description} ({modality})") print(f" Similarity: {score:.4f}") - print(f" File path: {content_path}\n") \ No newline at end of file + print(f" File path: {content_path}\n") diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_text.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_text.py index 61367ac0..d38e7767 100644 --- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_text.py +++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_text.py @@ -1,6 +1,9 @@ import sys import os -sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'src')) + +sys.path.append( + os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src") +) from embedding_generator import EmbeddingGenerator from elastic_manager import ElasticsearchManager @@ -24,19 +27,16 @@ embedding_text = generator.generate_embedding([text], "text") # Search for related evidence -similar_evidences = es_manager.search_similar( - query_embedding=embedding_text, - k=3 -) +similar_evidences = es_manager.search_similar(query_embedding=embedding_text, k=3) # Display the retrieved results print("\nšŸ”Ž Similar evidence found:\n") for i, evidence in enumerate(similar_evidences, start=1): - description = evidence['description'] - modality = evidence['modality'] - score = evidence['score'] - content_path = evidence.get('content_path', 'N/A') - + description = evidence["description"] + modality = evidence["modality"] + score = evidence["score"] + content_path = evidence.get("content_path", "N/A") + print(f"{i}. {description} ({modality})") print(f" Similarity: {score:.4f}") - print(f" File path: {content_path}\n") \ No newline at end of file + print(f" File path: {content_path}\n") diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/04-stage/rag_crime_analyze.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/04-stage/rag_crime_analyze.py index 04746f7c..03ac6e4d 100644 --- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/04-stage/rag_crime_analyze.py +++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/04-stage/rag_crime_analyze.py @@ -1,6 +1,9 @@ import sys import os -sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'src')) + +sys.path.append( + os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src") +) from embedding_generator import EmbeddingGenerator from elastic_manager import ElasticsearchManager @@ -23,43 +26,43 @@ llm = LLMAnalyzer() logger.info("āœ… All components initialized successfully") - + try: evidence_data = {} - + # Get data for each modality test_files = { - 'vision': 'data/images/crime_scene2.jpg', - 'audio': 'data/audios/joker_laugh.wav', - 'text': 'Why so serious?', - 'depth': 'data/depths/jdancing-depth.png' + "vision": "data/images/crime_scene2.jpg", + "audio": "data/audios/joker_laugh.wav", + "text": "Why so serious?", + "depth": "data/depths/jdancing-depth.png", } - + logger.info("šŸ” Collecting evidence...") for modality, test_input in test_files.items(): try: - if modality == 'text': + if modality == "text": embedding = generator.generate_embedding([test_input], modality) else: embedding = generator.generate_embedding([str(test_input)], modality) - + results = es_manager.search_similar(embedding, k=2) if results: evidence_data[modality] = results logger.info(f"āœ… Data retrieved for {modality}: {len(results)} results") else: logger.warning(f"āš ļø No results found for {modality}") - + except Exception as e: logger.error(f"āŒ Error retrieving {modality} data: {str(e)}") - + if not evidence_data: raise ValueError("No evidence data found in Elasticsearch!") - + # Test forensic report generation logger.info("\nšŸ“ Generating forensic report...") report = llm.analyze_evidence(evidence_data) - + if report: logger.info("āœ… Forensic report generated successfully") logger.info("\nšŸ“Š Report Preview:") @@ -68,7 +71,6 @@ logger.info("+" * 50) else: raise ValueError("Failed to generate forensic report") - + except Exception as e: logger.error(f"āŒ Error in analysis : {str(e)}") -