From 76475faa222738314bce4e33110bc3b03c515d08 Mon Sep 17 00:00:00 2001
From: Adrian Cole <adrian.cole@elastic.co>
Date: Wed, 26 Feb 2025 20:16:05 +0800
Subject: [PATCH] polish-and-docker

Signed-off-by: Adrian Cole <adrian.cole@elastic.co>
---
 .../.dockerignore                             |   8 +
 .../.env.template                             |  18 +-
 .../Dockerfile                                |  50 ++--
 .../README.md                                 |  67 ++---
 .../docker-compose.yml                        |  61 ++++
 .../requirements.in                           |  15 -
 .../requirements.txt                          | 274 +-----------------
 .../src/elastic_manager.py                    |  61 ++--
 .../src/embedding_generator.py                |  74 ++---
 .../src/llm_analyzer.py                       |  47 +--
 .../stages/01-stage/files_check.py            |   6 +-
 .../02-stage/test_embedding_generation.py     |  10 +-
 .../stages/03-stage/index_all_modalities.py   |  51 +++-
 .../stages/03-stage/search_by_audio.py        |  22 +-
 .../stages/03-stage/search_by_depth.py        |  25 +-
 .../stages/03-stage/search_by_image.py        |  26 +-
 .../stages/03-stage/search_by_text.py         |  22 +-
 .../stages/04-stage/rag_crime_analyze.py      |  34 ++-
 18 files changed, 359 insertions(+), 512 deletions(-)
 create mode 100644 supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/.dockerignore
 create mode 100644 supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/docker-compose.yml
 delete mode 100644 supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/requirements.in

diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/.dockerignore b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/.dockerignore
new file mode 100644
index 00000000..c93b7331
--- /dev/null
+++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/.dockerignore
@@ -0,0 +1,8 @@
+# Ignore everything
+**
+
+# Allow specific files and directories
+!requirements.txt
+!data/
+!src/
+!stages/
diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/.env.template b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/.env.template
index 7d9c72a1..383c870e 100644
--- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/.env.template
+++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/.env.template
@@ -1,13 +1,17 @@
-# Elasticsearch Configuration
-ELASTIC_API_KEY=your_api_key_here
-ELASTICSEARCH_ENDPOINT=your_elastic_endpoint
+# Make a copy of this file with the name .env and assign values to variables
+
+# How you connect to Elasticsearch: change details to your instance
+ELASTICSEARCH_URL=
+ELASTICSEARCH_API_KEY=
+# If not using API key, uncomment these and fill them in:
+# ELASTICSEARCH_USER=elastic
+# ELASTICSEARCH_PASSWORD=elastic
 
 # OpenAI Configuration
-OPENAI_API_KEY=your_openai_api_key_here
+OPENAI_API_KEY=
 
 # Model Configuration
-MODEL_PATH=~/.cache/torch/checkpoints/imagebind_huge.pth
 
 # Optional Configuration
-#LOG_LEVEL=INFO
-#DEBUG=False
\ No newline at end of file
+# LOG_LEVEL=INFO
+# DEBUG=False
\ No newline at end of file
diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/Dockerfile b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/Dockerfile
index efeb328c..2b22ab0d 100644
--- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/Dockerfile
+++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/Dockerfile
@@ -1,20 +1,36 @@
-FROM ubuntu:24.04
+# Use non-slim image due to OS dependencies of python packages. This gives us
+# git, build-essential, libglib2 (opencv) and gomp (torchaudio).
+FROM python:3.12
 
-# Install necessary packages
-RUN apt update && apt install -y --no-install-recommends \
-    python3 \
-    python3-pip \
-    python3-venv \
-    g++ \
-    gcc \
-    python3.12-dev
+COPY /requirements.txt .
 
-# Create and activate a virtual environment
-RUN python3 -m venv /opt/venv
-ENV PATH="/opt/venv/bin:$PATH"
+# Our python requirements have some OS dependencies beyond the base layer:
+#
+# * imagebind pulls in cartopy which has OS dependencies on geos and proj
+# * opencv has a runtime OS dependency on libgl1-mesa-glx
+#
+# The dev dependencies are installed temporarily to compile the wheels.
+# We leave the only the runtime dependencies, to keep the image smaller.
+RUN apt-get update && \
+    # install build and runtime dependencies
+    apt-get install -y --no-install-recommends \
+        libgeos-dev \
+        libproj-dev \
+        libgeos-c1v5 \
+        libproj25 \
+        libgl1-mesa-glx && \
+    # Install everything except xformers first
+    grep -v "\bxformers\b" requirements.txt > /tmp/r.txt && pip install -r /tmp/r.txt && \
+    # Now, install xformers, as it should be able to see torch now
+    grep "\bxformers\b" requirements.txt > /tmp/r.txt && pip install -r /tmp/r.txt && \
+    # remove build dependencies
+    apt-get purge -y libgeos-dev libproj-dev && \
+    apt-get autoremove -y && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+RUN mkdir -p ./data ./src ./stages
+COPY ./data ./data
+COPY ./src ./src
+COPY ./stages ./stages
 
-# Install Python packages in the virtual environment
-RUN pip install --upgrade pip
-RUN pip install torch
-RUN pip install wheel setuptools
-RUN pip install transformers xformers
diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/README.md b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/README.md
index d22e0232..a5934a47 100644
--- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/README.md
+++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/README.md
@@ -11,67 +11,34 @@ The pipeline demonstrates how to:
 
 ## Prerequisites
 
-- Python 3.10+
+- A Docker runtime with 8GB+ free ram
+  - GPU is optional, but recommended
 - Elasticsearch cluster (cloud or local)
 - OpenAI API key - Setup an OpenAI account and create a [secret key](https://platform.openai.com/docs/quickstart)
-- 8GB+ RAM
-- GPU (optional but recommended)
 
 ## Quick Start
 
-1. **Setup Environment**
-```bash
-rm -rf .venv requirements.txt
-python3 -m venv .venv
-source .venv/bin/activate
-pip install pip-tools
-# Recreate requirements.txt
-pip-compile
-# Install main dependencies
-pip install -r requirements.txt
-
-
-
-python3 -m venv .venv
-source .venv/bin/activate
-pip install "python-dotenv[cli]"
-pip install -r requirements-torch.txt
-pip install -r requirements.txt
-
-# Make sure you have pytorch installed and Python 3.10+
-pip install torch torchvision torchaudio
-
-# Create and activate virtual environment
-python -m venv env_mmrag
-source env_mmrag/bin/activate  # Unix/MacOS
-# or
-.\env_mmrag\Scripts\activate   # Windows
-
-# Install dependencies
-pip install -r requirements.txt
-```
+This example runs four stages as docker compose services:
 
-2. **Configure Credentials**
-Create a `.env` file:
-```env
-ELASTICSEARCH_ENDPOINT="your-elasticsearch-endpoint"
-ELASTIC_API_KEY="your-elastic-api-key"
-OPENAI_API_KEY="your-openai-api-key"
+```mermaid
+graph TD
+    verify-file-structure --> generate-embeddings
+    generate-embeddings --> index-content
+    index-content --> search-and-analyze
 ```
 
-3. **Run the Demo**
-```bash
-# Verify file structure
-python stages/01-stage/files_check.py
+First, copy [env.example](env.example) to `.env` and fill in values noted inside.
 
-# Generate embeddings
-python stages/02-stage/test_embedding_generation.py
+Now, enter below to run the pipeline:
+```bash
+docker compose run --build --rm search-and-analyze
+```
 
-# Index content
-python stages/03-stage/index_all_modalities.py
+The first time takes a while to build the image and download ImageBind weights.
 
-# Search and analyze
-python stages/04-stage/rag_crime_analyze.py
+If you want to re-run just one stage, add `--no-deps` like this:
+```bash
+docker compose run --no-deps --build --rm search-and-analyze
 ```
 
 ## Project Structure
diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/docker-compose.yml b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/docker-compose.yml
new file mode 100644
index 00000000..024ccd7b
--- /dev/null
+++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/docker-compose.yml
@@ -0,0 +1,61 @@
+name: gotham-city-crime-analysis
+
+services:
+  verify-file-structure:
+    build:
+      context: .
+    container_name: verify-file-structure
+    restart: 'no'  # no need to re-verify file structure
+    env_file:
+      - .env
+    command: python stages/01-stage/files_check.py
+    extra_hosts:  # send localhost traffic to the docker host, e.g. your laptop
+        - "localhost:host-gateway"
+
+  generate-embeddings:
+    depends_on:
+      verify-file-structure:
+        condition: service_completed_successfully
+    build:
+      context: .
+    container_name: generate-embeddings
+    restart: 'no'  # no need to re-generate embeddings
+    env_file:
+      - .env
+    command: python stages/02-stage/test_embedding_generation.py
+    extra_hosts:  # send localhost traffic to the docker host, e.g. your laptop
+        - "localhost:host-gateway"
+    volumes:
+      - torch-checkpoints:/root/cache/torch/checkpoints/
+
+  index-content:
+    depends_on:
+      generate-embeddings:
+        condition: service_completed_successfully
+    build:
+      context: .
+    container_name: index-content
+    restart: 'no'  # no need to re-verify file structure
+    env_file:
+      - .env
+    command: python stages/03-stage/index_all_modalities.py
+    extra_hosts:  # send localhost traffic to the docker host, e.g. your laptop
+        - "localhost:host-gateway"
+
+  search-and-analyze:
+    depends_on:
+      index-content:
+        condition: service_completed_successfully
+    build:
+      context: .
+    container_name: search-and-analyze
+    restart: 'no'  # no need to re-verify file structure
+    env_file:
+      - .env
+    command: python stages/04-stage/rag_crime_analyze.py
+    extra_hosts:  # send localhost traffic to the docker host, e.g. your laptop
+        - "localhost:host-gateway"
+
+volumes:
+  # Avoid re-downloading a >4GB model checkpoint
+  torch-checkpoints:
diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/requirements.in b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/requirements.in
deleted file mode 100644
index 74d6bb64..00000000
--- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/requirements.in
+++ /dev/null
@@ -1,15 +0,0 @@
-elasticsearch
-torch
-torchvision
-torchaudio
-imagebind @ git+https://github.com/hkchengrex/ImageBind.git
-openai
-python-dotenv
-numpy
-pillow
-opencv-python
-librosa
-matplotlib
-wheel
-setuptools
-xformers
\ No newline at end of file
diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/requirements.txt b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/requirements.txt
index 1b701e27..9517fa76 100644
--- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/requirements.txt
+++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/requirements.txt
@@ -1,261 +1,15 @@
-#
-# This file is autogenerated by pip-compile with Python 3.12
-# by the following command:
-#
-#    pip-compile
-#
-annotated-types==0.7.0
-    # via pydantic
-anyio==4.8.0
-    # via
-    #   httpx
-    #   openai
-audioread==3.0.1
-    # via librosa
-av==14.1.0
-    # via pytorchvideo
-cartopy==0.24.1
-    # via imagebind
-certifi==2025.1.31
-    # via
-    #   elastic-transport
-    #   httpcore
-    #   httpx
-    #   pyproj
-    #   requests
-cffi==1.17.1
-    # via soundfile
-charset-normalizer==3.4.1
-    # via requests
-contourpy==1.3.1
-    # via matplotlib
-cycler==0.12.1
-    # via matplotlib
-decorator==5.2.1
-    # via librosa
-distro==1.9.0
-    # via openai
-einops==0.8.1
-    # via imagebind
-elastic-transport==8.17.0
-    # via elasticsearch
-elasticsearch==8.17.1
-    # via -r requirements.in
-filelock==3.17.0
-    # via
-    #   huggingface-hub
-    #   torch
-fonttools==4.56.0
-    # via matplotlib
-fsspec==2025.2.0
-    # via
-    #   huggingface-hub
-    #   torch
-ftfy==6.3.1
-    # via imagebind
-fvcore==0.1.5.post20221221
-    # via
-    #   imagebind
-    #   pytorchvideo
-h11==0.14.0
-    # via httpcore
-httpcore==1.0.7
-    # via httpx
-httpx==0.28.1
-    # via openai
-huggingface-hub==0.29.1
-    # via timm
-idna==3.10
-    # via
-    #   anyio
-    #   httpx
-    #   requests
+elasticsearch~=8.17.1
+torch~=2.6.0
+torchvision~=0.21.0
+torchaudio~=2.6.0
 imagebind @ git+https://github.com/hkchengrex/ImageBind.git
-    # via -r requirements.in
-iopath==0.1.10
-    # via
-    #   fvcore
-    #   imagebind
-    #   pytorchvideo
-jinja2==3.1.5
-    # via torch
-jiter==0.8.2
-    # via openai
-joblib==1.4.2
-    # via
-    #   librosa
-    #   scikit-learn
-kiwisolver==1.4.8
-    # via matplotlib
-lazy-loader==0.4
-    # via librosa
-librosa==0.10.2.post1
-    # via -r requirements.in
-llvmlite==0.44.0
-    # via numba
-markupsafe==3.0.2
-    # via jinja2
-matplotlib==3.10.0
-    # via
-    #   -r requirements.in
-    #   cartopy
-    #   imagebind
-mpmath==1.3.0
-    # via sympy
-msgpack==1.1.0
-    # via librosa
-networkx==3.4.2
-    # via
-    #   pytorchvideo
-    #   torch
-numba==0.61.0
-    # via librosa
-numpy==2.1.3
-    # via
-    #   -r requirements.in
-    #   cartopy
-    #   contourpy
-    #   fvcore
-    #   imagebind
-    #   librosa
-    #   matplotlib
-    #   numba
-    #   opencv-python
-    #   scikit-learn
-    #   scipy
-    #   shapely
-    #   soundfile
-    #   soxr
-    #   torchvision
-openai==1.64.0
-    # via -r requirements.in
-opencv-python==4.11.0.86
-    # via -r requirements.in
-packaging==24.2
-    # via
-    #   cartopy
-    #   huggingface-hub
-    #   lazy-loader
-    #   matplotlib
-    #   pooch
-parameterized==0.9.0
-    # via pytorchvideo
-pillow==11.1.0
-    # via
-    #   -r requirements.in
-    #   fvcore
-    #   matplotlib
-    #   torchvision
-platformdirs==4.3.6
-    # via pooch
-pooch==1.8.2
-    # via librosa
-portalocker==3.1.1
-    # via iopath
-pycparser==2.22
-    # via cffi
-pydantic==2.10.6
-    # via openai
-pydantic-core==2.27.2
-    # via pydantic
-pyparsing==3.2.1
-    # via matplotlib
-pyproj==3.7.1
-    # via cartopy
-pyshp==2.3.1
-    # via cartopy
-python-dateutil==2.9.0.post0
-    # via matplotlib
-python-dotenv==1.0.1
-    # via -r requirements.in
-pytorchvideo @ git+https://github.com/facebookresearch/pytorchvideo.git@ae9cfc6e62ca49eb9721a7a56e1e13e348ad21dc
-    # via imagebind
-pyyaml==6.0.2
-    # via
-    #   fvcore
-    #   huggingface-hub
-    #   timm
-    #   yacs
-regex==2024.11.6
-    # via imagebind
-requests==2.32.3
-    # via
-    #   huggingface-hub
-    #   pooch
-safetensors==0.5.2
-    # via timm
-scikit-learn==1.6.1
-    # via librosa
-scipy==1.15.2
-    # via
-    #   librosa
-    #   scikit-learn
-shapely==2.0.7
-    # via cartopy
-six==1.17.0
-    # via python-dateutil
-sniffio==1.3.1
-    # via
-    #   anyio
-    #   openai
-soundfile==0.13.1
-    # via librosa
-soxr==0.5.0.post1
-    # via librosa
-sympy==1.13.1
-    # via torch
-tabulate==0.9.0
-    # via fvcore
-termcolor==2.5.0
-    # via fvcore
-threadpoolctl==3.5.0
-    # via scikit-learn
-timm==1.0.15
-    # via imagebind
-torch==2.6.0
-    # via
-    #   -r requirements.in
-    #   imagebind
-    #   timm
-    #   torchaudio
-    #   torchvision
-torchaudio==2.6.0
-    # via
-    #   -r requirements.in
-    #   imagebind
-torchvision==0.21.0
-    # via
-    #   -r requirements.in
-    #   imagebind
-    #   timm
-tqdm==4.67.1
-    # via
-    #   fvcore
-    #   huggingface-hub
-    #   iopath
-    #   openai
-types-regex==2024.11.6.20241221
-    # via imagebind
-typing-extensions==4.12.2
-    # via
-    #   anyio
-    #   huggingface-hub
-    #   iopath
-    #   librosa
-    #   openai
-    #   pydantic
-    #   pydantic-core
-    #   torch
-urllib3==2.3.0
-    # via
-    #   elastic-transport
-    #   requests
-wcwidth==0.2.13
-    # via ftfy
-wheel==0.45.1
-    # via -r requirements.in
-yacs==0.1.8
-    # via fvcore
-
-# The following packages are considered to be unsafe in a requirements file:
-# setuptools
+openai~=1.64.0
+python-dotenv~=1.0.1
+numpy~=2.1.3
+pillow~=11.1.0
+opencv-python~=4.11.0
+librosa~=0.10.2
+matplotlib~=3.10.0
+wheel~=0.45.1
+setuptools
+xformers~=0.0.29
diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/elastic_manager.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/elastic_manager.py
index f7fbcd6c..df5597e7 100644
--- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/elastic_manager.py
+++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/elastic_manager.py
@@ -4,15 +4,16 @@
 from dotenv import load_dotenv
 import numpy as np
 
+
 class ElasticsearchManager:
     """Manages multimodal operations in Elasticsearch"""
-    
+
     def __init__(self):
         load_dotenv()  # Load variables from .env
         self.es = self._connect_elastic()
         self.index_name = "multimodal_content"
         self._setup_index()
-    
+
     def _connect_elastic(self):
         """Connects to Elasticsearch"""
         ELASTICSEARCH_URL = os.getenv("ELASTICSEARCH_URL")
@@ -33,7 +34,7 @@ def _connect_elastic(self):
             raise ValueError(
                 "Please provide either ELASTICSEARCH_USER or ELASTICSEARCH_API_KEY"
             )
-    
+
     def _setup_index(self):
         """Sets up the index if it doesn't exist"""
         if not self.es.indices.exists(index=self.index_name):
@@ -44,33 +45,45 @@ def _setup_index(self):
                             "type": "dense_vector",
                             "dims": 1024,
                             "index": True,
-                            "similarity": "cosine"
+                            "similarity": "cosine",
                         },
                         "modality": {"type": "keyword"},
                         "content": {"type": "binary"},
                         "description": {"type": "text"},
                         "metadata": {"type": "object"},
-                        "content_path": {"type": "text"}
+                        "content_path": {"type": "text"},
                     }
                 }
             }
             self.es.indices.create(index=self.index_name, body=mapping)
-    
-    def index_content(self, embedding, modality, content=None, description="", metadata=None, content_path=None):
+
+    def index_content(
+        self,
+        embedding,
+        modality,
+        content=None,
+        description="",
+        metadata=None,
+        content_path=None,
+    ):
         """Indexes multimodal content"""
         doc = {
             "embedding": embedding.tolist(),
             "modality": modality,
             "description": description,
             "metadata": metadata or {},
-            "content_path": content_path
+            "content_path": content_path,
         }
-        
+
         if content:
-            doc["content"] = base64.b64encode(content).decode() if isinstance(content, bytes) else content
-        
+            doc["content"] = (
+                base64.b64encode(content).decode()
+                if isinstance(content, bytes)
+                else content
+            )
+
         return self.es.index(index=self.index_name, document=doc)
-    
+
     def search_similar(self, query_embedding, modality=None, k=5):
         """Searches for similar contents"""
         query = {
@@ -79,23 +92,19 @@ def search_similar(self, query_embedding, modality=None, k=5):
                 "query_vector": query_embedding.tolist(),
                 "k": k,
                 "num_candidates": 100,
-                "filter": [{"term": {"modality": modality}}] if modality else []
+                "filter": [{"term": {"modality": modality}}] if modality else [],
             }
         }
-        
+
         try:
-            response = self.es.search(
-                index=self.index_name,
-                query=query,
-                size=k            
-            )
-            
+            response = self.es.search(index=self.index_name, query=query, size=k)
+
             # Return both source data and score for each hit
-            return [{
-                **hit["_source"],
-                "score": hit["_score"]
-            } for hit in response["hits"]["hits"]]
-        
+            return [
+                {**hit["_source"], "score": hit["_score"]}
+                for hit in response["hits"]["hits"]
+            ]
+
         except Exception as e:
             print(f"Error: processing search_evidence: {str(e)}")
-            return "Error generating search evidence"
\ No newline at end of file
+            return "Error generating search evidence"
diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/embedding_generator.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/embedding_generator.py
index 8e3e0d3f..9d1d337d 100644
--- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/embedding_generator.py
+++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/embedding_generator.py
@@ -16,87 +16,93 @@
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
+
 class EmbeddingGenerator:
     """Generates multimodal embeddings using ImageBind"""
-    
+
     def __init__(self, device="cpu"):
         self.device = device
         self.model = self._load_model()
-        
+
     def _load_model(self):
         """Initialize and test the ImageBind model."""
-        checkpoint_path = "~/.cache/torch/checkpoints/imagebind_huge.pth"
-        os.makedirs(os.path.expanduser("~/.cache/torch/checkpoints"), exist_ok=True)
 
-        if not os.path.exists(os.path.expanduser(checkpoint_path)):
+        checkpoint_path = os.path.expanduser(
+            "~/.cache/torch/checkpoints/imagebind_huge.pth"
+        )
+        logger.info(f"Using cache {checkpoint_path}")
+        os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True)
+
+        if not os.path.exists(checkpoint_path):
             print("Downloading ImageBind weights...")
             download_url_to_file(
                 "https://dl.fbaipublicfiles.com/imagebind/imagebind_huge.pth",
-                os.path.expanduser(checkpoint_path)
+                checkpoint_path,
             )
-            
+
         try:
-            checkpoint_path = os.path.expanduser("~/.cache/torch/checkpoints/imagebind_huge.pth")
-        
             # Check if file exists
             if not os.path.exists(checkpoint_path):
                 raise FileNotFoundError(f"Checkpoint not found: {checkpoint_path}")
-                
+
             model = imagebind_model.imagebind_huge(pretrained=False)
             model.load_state_dict(torch.load(checkpoint_path))
             model.eval().to(self.device)
-            
+
             # Quick test with empty text input
             logger.info("Testing model with sample input...")
             test_input = data.load_and_transform_text([""], self.device)
             with torch.no_grad():
                 _ = model({"text": test_input})
-            
+
             logger.info("🤖 ImageBind model initialized successfully")
             return model
         except Exception as e:
             logger.error(f"🚨 Model initialization failed: {str(e)}")
             raise
-    
+
     def generate_embedding(self, input_data, modality):
         """Generates embedding for different modalities"""
         processors = {
             "vision": lambda x: data.load_and_transform_vision_data(x, self.device),
             "audio": lambda x: data.load_and_transform_audio_data(x, self.device),
             "text": lambda x: data.load_and_transform_text(x, self.device),
-            "depth": self.process_depth
+            "depth": self.process_depth,
         }
-        
+
         try:
             # Input type verification
             if not isinstance(input_data, list):
-                raise ValueError(f"Input data must be a list. Received: {type(input_data)}")
-                
+                raise ValueError(
+                    f"Input data must be a list. Received: {type(input_data)}"
+                )
+
             # Convert input data to a tensor format that the model can process
-            # For images: [batch_size, channels, height, width] 
-            # For audio: [batch_size, channels, time] 
+            # For images: [batch_size, channels, height, width]
+            # For audio: [batch_size, channels, time]
             # For text: [batch_size, sequence_length]
             inputs = {modality: processors[modality](input_data)}
             with torch.no_grad():
                 embedding = self.model(inputs)[modality]
             return embedding.squeeze(0).cpu().numpy()
         except Exception as e:
-            logger.error(f"Error generating {modality} embedding: {str(e)}", exc_info=True)
+            logger.error(
+                f"Error generating {modality} embedding: {str(e)}", exc_info=True
+            )
             raise
-    
 
     def process_vision(self, image_path):
         """Processes image"""
         return data.load_and_transform_vision_data([image_path], self.device)
-    
+
     def process_audio(self, audio_path):
         """Processes audio"""
         return data.load_and_transform_audio_data([audio_path], self.device)
-    
+
     def process_text(self, text):
         """Processes text"""
         return data.load_and_transform_text([text], self.device)
-    
+
     def process_depth(self, depth_paths, device="cpu"):
         """Custom processing for depth maps"""
         try:
@@ -104,17 +110,19 @@ def process_depth(self, depth_paths, device="cpu"):
             for path in depth_paths:
                 if not os.path.exists(path):
                     raise FileNotFoundError(f"Depth map file not found: {path}")
-            
+
             # Load and transform
             depth_images = [Image.open(path).convert("L") for path in depth_paths]
-            
-            transform = transforms.Compose([
-                transforms.Resize((224, 224)),
-                transforms.ToTensor(),
-            ])
-            
+
+            transform = transforms.Compose(
+                [
+                    transforms.Resize((224, 224)),
+                    transforms.ToTensor(),
+                ]
+            )
+
             return torch.stack([transform(img) for img in depth_images]).to(device)
-            
+
         except Exception as e:
             logger.error(f"🚨 - Error processing depth map: {str(e)}")
-            raise
\ No newline at end of file
+            raise
diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/llm_analyzer.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/llm_analyzer.py
index 1d2a11c5..13cfc174 100644
--- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/llm_analyzer.py
+++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/src/llm_analyzer.py
@@ -6,17 +6,18 @@
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
+
 class LLMAnalyzer:
     """Evidence analyzer using GPT-4"""
-    
+
     def __init__(self):
         load_dotenv()
         self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-    
+
     def analyze_evidence(self, evidence_results):
         """
         Analyzes multimodal search results and generates a report
-        
+
         Args:
             evidence_results: Dict with results by modality
             {
@@ -63,40 +64,42 @@ def analyze_evidence(self, evidence_results):
                 messages=[
                     {
                         "role": "system",
-                        "content": "You are a forensic detective specialized in multimodal evidence analysis."
+                        "content": "You are a forensic detective specialized in multimodal evidence analysis.",
                     },
-                    {"role": "user", "content": prompt}
+                    {"role": "user", "content": prompt},
                 ],
                 temperature=0.2,
-                max_tokens=1000
+                max_tokens=1000,
             )
-            
+
             report = response.choices[0].message.content
             logger.info("\n📋 Forensic Report Generated:")
             logger.info("=" * 50)
             logger.info(report)
             logger.info("=" * 50)
-            
+
             return report
-            
+
         except Exception as e:
             logger.error(f"Error generating report: {str(e)}")
             return None
-    
+
     def _format_evidence(self, evidence_results):
         """Formats evidence for the prompt"""
         formatted = []
-        
+
         for modality, results in evidence_results.items():
             formatted.append(f"\n{modality.upper()}:")
             for i, result in enumerate(results, 1):
-                description = result.get('description', 'No description')
-                similarity = result.get('score', 0)
+                description = result.get("description", "No description")
+                similarity = result.get("score", 0)
                 formatted.append(f"{i}. {description} (Similarity: {similarity:.2f})")
-        
+
         return "\n".join(formatted)
 
-    def analyze_cross_modal_connections(self, results_a, modality_a, results_b, modality_b):
+    def analyze_cross_modal_connections(
+        self, results_a, modality_a, results_b, modality_b
+    ):
         """Analyzes specific connections between two different modalities"""
         prompt = f"""Analyze the relationship between the following evidence from different modalities:
 
@@ -120,20 +123,20 @@ def analyze_cross_modal_connections(self, results_a, modality_a, results_b, moda
                 messages=[
                     {
                         "role": "system",
-                        "content": "You are an expert in forensic analysis of multimodal evidence."
+                        "content": "You are an expert in forensic analysis of multimodal evidence.",
                     },
-                    {"role": "user", "content": prompt}
+                    {"role": "user", "content": prompt},
                 ],
                 temperature=0.7,
-                max_tokens=500
+                max_tokens=500,
             )
-            
+
             analysis = response.choices[0].message.content
             logger.info(f"\n🔍 Cross-Modal Analysis ({modality_a} x {modality_b}):")
             logger.info(analysis)
-            
+
             return analysis
-            
+
         except Exception as e:
             logger.error(f"Error: in cross-modal analysis: {str(e)}")
-            return None
\ No newline at end of file
+            return None
diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/01-stage/files_check.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/01-stage/files_check.py
index 875727e8..6c8dfe48 100644
--- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/01-stage/files_check.py
+++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/01-stage/files_check.py
@@ -5,10 +5,10 @@
 
 # List of expected files
 evidences = {
-    "images": ["crime_scene1.jpg","crime_scene1.jpg", "joker_alley.jpg"],
+    "images": ["crime_scene1.jpg", "crime_scene1.jpg", "joker_alley.jpg"],
     "audios": ["joker_laugh.wav"],
     "texts": ["riddle.txt", "note2.txt"],
-    "depths": ["depth_suspect.png"]
+    "depths": ["depth_suspect.png"],
 }
 
 # Create directories if they don't exist
@@ -21,4 +21,4 @@
         if not os.path.exists(file_path):
             print(f"Warning: {file} not found in {category_path}.")
 
-print("All files are correctly organized!")
\ No newline at end of file
+print("All files are correctly organized!")
diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/02-stage/test_embedding_generation.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/02-stage/test_embedding_generation.py
index 8e74cede..cf14afa9 100644
--- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/02-stage/test_embedding_generation.py
+++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/02-stage/test_embedding_generation.py
@@ -2,7 +2,9 @@
 import os
 
 # Add the src directory to Python path
-sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'src'))
+sys.path.append(
+    os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src")
+)
 
 from embedding_generator import EmbeddingGenerator
 
@@ -10,8 +12,10 @@
 generator = EmbeddingGenerator()
 
 # Generate embedding for the image
-image_embedding = generator.generate_embedding(["data/images/crime_scene1.jpg"], "vision")
+image_embedding = generator.generate_embedding(
+    ["data/images/crime_scene1.jpg"], "vision"
+)
 
 # Print the shape
 print(image_embedding.shape)
-# Expected Output: (1024,)
\ No newline at end of file
+# Expected Output: (1024,)
diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/index_all_modalities.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/index_all_modalities.py
index fbfc4d78..bb2476ae 100644
--- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/index_all_modalities.py
+++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/index_all_modalities.py
@@ -1,6 +1,9 @@
 import sys
 import os
-sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'src'))
+
+sys.path.append(
+    os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src")
+)
 
 from embedding_generator import EmbeddingGenerator
 from elastic_manager import ElasticsearchManager
@@ -15,6 +18,7 @@
 # Load environment variables
 load_dotenv()
 
+
 def process_evidence(generator, es_manager, file_path, modality, description, metadata):
     """Helper function to process each piece of evidence"""
     try:
@@ -28,20 +32,21 @@ def process_evidence(generator, es_manager, file_path, modality, description, me
             modality=modality,
             description=description,
             content_path=file_path,
-            metadata=metadata
+            metadata=metadata,
         )
-        
+
         # Convert Elasticsearch response to dict for JSON serialization
         response_dict = {
             "result": response["result"],
             "_id": response["_id"],
-            "_index": response["_index"]
+            "_index": response["_index"],
         }
         logger.info(f"\n\nIndexed {modality}: {json.dumps(response_dict, indent=2)}")
-        
+
     except Exception as e:
         logger.error(f"Error processing {modality}: {str(e)}")
 
+
 def main():
     # Initialize components
     generator = EmbeddingGenerator()
@@ -57,44 +62,59 @@ def main():
             "file_path": "data/images/crime_scene1.jpg",
             "modality": "vision",
             "description": "Photo of the crime scene: A dark, rain-soaked alley is filled with playing cards, while a sinister graffiti of the Joker laughing stands out on the brick wall.",
-            "metadata": {"location": "Gotham Central Bank", "timestamp": "2025-01-30 23:15"}
+            "metadata": {
+                "location": "Gotham Central Bank",
+                "timestamp": "2025-01-30 23:15",
+            },
         },
         {
             "file_path": "data/images/joker_laughing.png",
             "modality": "vision",
             "description": "The Joker with green hair, white face paint, and a sinister smile in an urban night setting.",
-            "metadata": {"location": "Gotham Theatre", "timestamp": "2025-01-30 23:25"}
+            "metadata": {"location": "Gotham Theatre", "timestamp": "2025-01-30 23:25"},
         },
         {
             "file_path": "data/images/jdancing.png",
             "modality": "vision",
             "description": "Suspect dancing",
-            "metadata": {"location": "Gotham Central Station", "timestamp": "2025-01-30 23:18"}
+            "metadata": {
+                "location": "Gotham Central Station",
+                "timestamp": "2025-01-30 23:18",
+            },
         },
         {
             "file_path": "data/audios/joker_laugh.wav",
             "modality": "audio",
             "description": "A sinister laugh captured near the crime scene",
-            "metadata": {"location": "Gotham Central Bank - Main Hall", "timestamp": "2025-01-30 23:16"}
+            "metadata": {
+                "location": "Gotham Central Bank - Main Hall",
+                "timestamp": "2025-01-30 23:16",
+            },
         },
         {
             "file_path": "data/texts/note2.txt",
             "modality": "text",
             "description": "Why so serious",
-            "metadata": {"location": "Gotham Theatre", "timestamp": "2025-01-30 23:25"}
+            "metadata": {"location": "Gotham Theatre", "timestamp": "2025-01-30 23:25"},
         },
         {
             "file_path": "data/texts/riddle.txt",
             "modality": "text",
             "description": "Mysterious note found at the location",
-            "metadata": {"location": "Gotham Central Bank - Vault", "timestamp": "2025-01-30 23:20"}
+            "metadata": {
+                "location": "Gotham Central Bank - Vault",
+                "timestamp": "2025-01-30 23:20",
+            },
         },
         {
             "file_path": "data/depths/depth_suspect.png",
             "modality": "depth",
             "description": "Depth sensor capture of the suspect",
-            "metadata": {"location": "Gotham Central Bank - Back Alley", "timestamp": "2025-01-30 23:18"}
-        }
+            "metadata": {
+                "location": "Gotham Central Bank - Back Alley",
+                "timestamp": "2025-01-30 23:18",
+            },
+        },
     ]
 
     # Process each piece of evidence
@@ -105,8 +125,9 @@ def main():
             evidence["file_path"],
             evidence["modality"],
             evidence["description"],
-            evidence["metadata"]
+            evidence["metadata"],
         )
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_audio.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_audio.py
index 453f4d1c..2963510a 100644
--- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_audio.py
+++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_audio.py
@@ -1,6 +1,9 @@
 import sys
 import os
-sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'src'))
+
+sys.path.append(
+    os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src")
+)
 
 from embedding_generator import EmbeddingGenerator
 from elastic_manager import ElasticsearchManager
@@ -23,19 +26,16 @@
 audio_embedding = generator.generate_embedding(["data/audios/joker_laugh.wav"], "audio")
 
 # Search for similar evidence in Elasticsearch
-similar_evidences = es_manager.search_similar(
-    query_embedding=audio_embedding,
-    k=3
-)
+similar_evidences = es_manager.search_similar(query_embedding=audio_embedding, k=3)
 
 # Display the retrieved results
 print("\n🔎 Similar evidence found:\n")
 for i, evidence in enumerate(similar_evidences, start=1):
-    description = evidence['description']
-    modality = evidence['modality']
-    score = evidence['score']
-    content_path = evidence.get('content_path', 'N/A')
-    
+    description = evidence["description"]
+    modality = evidence["modality"]
+    score = evidence["score"]
+    content_path = evidence.get("content_path", "N/A")
+
     print(f"{i}. {description} ({modality})")
     print(f"   Similarity: {score:.4f}")
-    print(f"   File path: {content_path}\n")
\ No newline at end of file
+    print(f"   File path: {content_path}\n")
diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_depth.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_depth.py
index 95f998c2..001985f8 100644
--- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_depth.py
+++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_depth.py
@@ -1,6 +1,9 @@
 import sys
 import os
-sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'src'))
+
+sys.path.append(
+    os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src")
+)
 
 from embedding_generator import EmbeddingGenerator
 from elastic_manager import ElasticsearchManager
@@ -20,23 +23,23 @@
 es_manager = ElasticsearchManager()
 
 # Generate embedding for a suspicious depth map
-vision_embedding = generator.generate_embedding(["data/depths/jdancing-depth.png"], "depth")
+vision_embedding = generator.generate_embedding(
+    ["data/depths/jdancing-depth.png"], "depth"
+)
 
 # Search for similar evidence in Elasticsearch
 similar_evidences = es_manager.search_similar(
-    query_embedding=vision_embedding,
-    modality="vision",
-    k=3
+    query_embedding=vision_embedding, modality="vision", k=3
 )
 
 # Display the retrieved results
 print("\n🔎 Similar evidence found:\n")
 for i, evidence in enumerate(similar_evidences, start=1):
-    description = evidence['description']
-    modality = evidence['modality']
-    score = evidence['score']
-    content_path = evidence.get('content_path', 'N/A')
-    
+    description = evidence["description"]
+    modality = evidence["modality"]
+    score = evidence["score"]
+    content_path = evidence.get("content_path", "N/A")
+
     print(f"{i}. {description} ({modality})")
     print(f"   Similarity: {score:.4f}")
-    print(f"   File path: {content_path}\n")
\ No newline at end of file
+    print(f"   File path: {content_path}\n")
diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_image.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_image.py
index d88649d1..b55efc94 100644
--- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_image.py
+++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_image.py
@@ -1,6 +1,9 @@
 import sys
 import os
-sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'src'))
+
+sys.path.append(
+    os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src")
+)
 
 from embedding_generator import EmbeddingGenerator
 from elastic_manager import ElasticsearchManager
@@ -20,22 +23,21 @@
 es_manager = ElasticsearchManager()
 
 # Generate embedding for a suspicious image
-vision_embedding = generator.generate_embedding(["data/images/crime_scene2.jpg"], "vision")
+vision_embedding = generator.generate_embedding(
+    ["data/images/crime_scene2.jpg"], "vision"
+)
 
 # Search for similar evidence in Elasticsearch
-similar_evidences = es_manager.search_similar(
-    query_embedding=vision_embedding,
-    k=3
-)
+similar_evidences = es_manager.search_similar(query_embedding=vision_embedding, k=3)
 
 # Display the retrieved results
 print("\n🔎 Similar evidence found:\n")
 for i, evidence in enumerate(similar_evidences, start=1):
-    description = evidence['description']
-    modality = evidence['modality']
-    score = evidence['score']
-    content_path = evidence.get('content_path', 'N/A')
-    
+    description = evidence["description"]
+    modality = evidence["modality"]
+    score = evidence["score"]
+    content_path = evidence.get("content_path", "N/A")
+
     print(f"{i}. {description} ({modality})")
     print(f"   Similarity: {score:.4f}")
-    print(f"   File path: {content_path}\n")
\ No newline at end of file
+    print(f"   File path: {content_path}\n")
diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_text.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_text.py
index 61367ac0..d38e7767 100644
--- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_text.py
+++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/03-stage/search_by_text.py
@@ -1,6 +1,9 @@
 import sys
 import os
-sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'src'))
+
+sys.path.append(
+    os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src")
+)
 
 from embedding_generator import EmbeddingGenerator
 from elastic_manager import ElasticsearchManager
@@ -24,19 +27,16 @@
 embedding_text = generator.generate_embedding([text], "text")
 
 # Search for related evidence
-similar_evidences = es_manager.search_similar(
-    query_embedding=embedding_text,
-    k=3
-)
+similar_evidences = es_manager.search_similar(query_embedding=embedding_text, k=3)
 
 # Display the retrieved results
 print("\n🔎 Similar evidence found:\n")
 for i, evidence in enumerate(similar_evidences, start=1):
-    description = evidence['description']
-    modality = evidence['modality']
-    score = evidence['score']
-    content_path = evidence.get('content_path', 'N/A')
-    
+    description = evidence["description"]
+    modality = evidence["modality"]
+    score = evidence["score"]
+    content_path = evidence.get("content_path", "N/A")
+
     print(f"{i}. {description} ({modality})")
     print(f"   Similarity: {score:.4f}")
-    print(f"   File path: {content_path}\n")
\ No newline at end of file
+    print(f"   File path: {content_path}\n")
diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/04-stage/rag_crime_analyze.py b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/04-stage/rag_crime_analyze.py
index 04746f7c..03ac6e4d 100644
--- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/04-stage/rag_crime_analyze.py
+++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/stages/04-stage/rag_crime_analyze.py
@@ -1,6 +1,9 @@
 import sys
 import os
-sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'src'))
+
+sys.path.append(
+    os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src")
+)
 
 from embedding_generator import EmbeddingGenerator
 from elastic_manager import ElasticsearchManager
@@ -23,43 +26,43 @@
 
 llm = LLMAnalyzer()
 logger.info("✅ All components initialized successfully")
-    
+
 try:
     evidence_data = {}
-    
+
     # Get data for each modality
     test_files = {
-        'vision': 'data/images/crime_scene2.jpg',
-        'audio': 'data/audios/joker_laugh.wav',
-        'text': 'Why so serious?',
-        'depth': 'data/depths/jdancing-depth.png'
+        "vision": "data/images/crime_scene2.jpg",
+        "audio": "data/audios/joker_laugh.wav",
+        "text": "Why so serious?",
+        "depth": "data/depths/jdancing-depth.png",
     }
-    
+
     logger.info("🔍 Collecting evidence...")
     for modality, test_input in test_files.items():
         try:
-            if modality == 'text':
+            if modality == "text":
                 embedding = generator.generate_embedding([test_input], modality)
             else:
                 embedding = generator.generate_embedding([str(test_input)], modality)
-            
+
             results = es_manager.search_similar(embedding, k=2)
             if results:
                 evidence_data[modality] = results
                 logger.info(f"✅ Data retrieved for {modality}: {len(results)} results")
             else:
                 logger.warning(f"⚠️ No results found for {modality}")
-                
+
         except Exception as e:
             logger.error(f"❌ Error retrieving {modality} data: {str(e)}")
-    
+
     if not evidence_data:
         raise ValueError("No evidence data found in Elasticsearch!")
-    
+
     # Test forensic report generation
     logger.info("\n📝 Generating forensic report...")
     report = llm.analyze_evidence(evidence_data)
-    
+
     if report:
         logger.info("✅ Forensic report generated successfully")
         logger.info("\n📊 Report Preview:")
@@ -68,7 +71,6 @@
         logger.info("+" * 50)
     else:
         raise ValueError("Failed to generate forensic report")
-        
+
 except Exception as e:
     logger.error(f"❌ Error in analysis : {str(e)}")
-