From d34ab33b903054abfec48de803557a26f0ec6e38 Mon Sep 17 00:00:00 2001 From: alex salgado Date: Thu, 27 Feb 2025 18:38:16 -0300 Subject: [PATCH] remove wrong folder --- .../notebook/01-mmrag-blog-quick-start.ipynb | 1294 ++++++----------- .../notebook/elasticsearch-labs | 1 - 2 files changed, 428 insertions(+), 867 deletions(-) delete mode 160000 supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/notebook/elasticsearch-labs diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/notebook/01-mmrag-blog-quick-start.ipynb b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/notebook/01-mmrag-blog-quick-start.ipynb index 759cdb94..108412d1 100644 --- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/notebook/01-mmrag-blog-quick-start.ipynb +++ b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/notebook/01-mmrag-blog-quick-start.ipynb @@ -1,870 +1,432 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "dGVterhZUeb7" - }, - "source": [ - "\n", - "\n", - "# Multimodal RAG with Elasticsearch: The Gotham City Case\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JGuNiw7hUc6M" - }, - "source": [ - "This notebook implements the Multimodal RAG (Retrieval-Augmented Generation) pipeline with Elasticsearch as described in the blog. We follow the same structure as the article, with each section explained and implemented in code.\n", - "\n", - "## Environment Setup\n", - "\n", - "First, we need to clone the repository that contains the complete project code." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "UM5x0n2iA7o2" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cloning into 'elasticsearch-labs'...\n", - "remote: Enumerating objects: 4343, done.\u001b[K\n", - "remote: Counting objects: 100% (688/688), done.\u001b[K\n", - "remote: Compressing objects: 100% (239/239), done.\u001b[K\n", - "remote: Total 4343 (delta 546), reused 458 (delta 449), pack-reused 3655 (from 1)\u001b[K\n", - "Receiving objects: 100% (4343/4343), 98.51 MiB | 40.58 MiB/s, done.\n", - "Resolving deltas: 100% (2431/2431), done.\n" - ] - } - ], - "source": [ - "# Clone do repositório específico com a branch feature/multimodal-rag-gotham\n", - "!git clone -b feature/multimodal-rag-gotham https://github.com/salgado/elasticsearch-labs.git" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import getpass" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "e6mW8JNyVdzi" - }, - "source": [ - "Let's navigate to the project directory where the necessary files are located:\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "PHrDQc0jOOb7" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/Users/jessgarson/elasticsearch-labs/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/notebook/elasticsearch-labs/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/IPython/core/magics/osm.py:417: UserWarning: using dhist requires you to install the `pickleshare` library.\n", - " self.shell.db['dhist'] = compress_dhist(dhist)[-100:]\n" - ] - } - ], - "source": [ - "cd elasticsearch-labs/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "LAGB159_Uaxb" - }, - "source": [ - "Now let's configure the environment variables needed to connect to Elasticsearch and OpenAI. This is necessary for indexing and searching content, as well as generating the final report.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "U8IuJRQhS7lz" - }, - "outputs": [ - { - "name": "stdin", - "output_type": "stream", - "text": [ - "Enter the Elasticsearch endpoint url: https://getting-started.es.us-east4.gcp.elastic-cloud.com\n", - "Enter the Elasticsearch API key: ········\n", - "Enter the OpenAI API key: ········\n" - ] - } - ], - "source": [ - "ELASTICSEARCH_URL = input(\"Enter the Elasticsearch endpoint url: \")\n", - "ELASTICSEARCH_API_KEY = getpass.getpass(\"Enter the Elasticsearch API key: \")\n", - "OPENAI_API_KEY = getpass.getpass(\"Enter the OpenAI API key: \")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "ZC4v_SHjMwLa" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "os.environ[\"ELASTICSEARCH_API_KEY\"] = ELASTICSEARCH_API_KEY\n", - "os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY\n", - "os.environ[\"ELASTICSEARCH_URL\"] = ELASTICSEARCH_URL" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RNRExs7aVl45" - }, - "source": [ - "\n", - "## Installing Dependencies\n", - "\n", - "As mentioned in the blog, we need to install the specific dependencies, including the custom ImageBind fork:\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "id": "FhPcJYl03eNL" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "zsh:1: 2.1.0 not found\n", - "Requirement already satisfied: opencv-python-headless in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (4.11.0.86)\n", - "Requirement already satisfied: pillow in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (11.1.0)\n", - "Requirement already satisfied: numpy in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (2.2.3)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Collecting git+https://github.com/hkchengrex/ImageBind.git\n", - " Cloning https://github.com/hkchengrex/ImageBind.git to /private/var/folders/z9/dz5wy_nd4_v1_gc8dg_5krqr0000gn/T/pip-req-build-4_8958wu\n", - " Running command git clone --filter=blob:none --quiet https://github.com/hkchengrex/ImageBind.git /private/var/folders/z9/dz5wy_nd4_v1_gc8dg_5krqr0000gn/T/pip-req-build-4_8958wu\n", - " Resolved https://github.com/hkchengrex/ImageBind.git to commit 9989650c87d393d7e8c144194182cbf124cd03a0\n", - " Installing build dependencies ... \u001b[?25ldone\n", - "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", - "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", - "\u001b[?25hCollecting pytorchvideo@ git+https://github.com/facebookresearch/pytorchvideo.git@ae9cfc6e62ca49eb9721a7a56e1e13e348ad21dc\n", - " Cloning https://github.com/facebookresearch/pytorchvideo.git (to revision ae9cfc6e62ca49eb9721a7a56e1e13e348ad21dc) to /private/var/folders/z9/dz5wy_nd4_v1_gc8dg_5krqr0000gn/T/pip-install-61j1i73l/pytorchvideo_0d7e0ec9e7e042daadbe7e3b3e37996e\n", - " Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/pytorchvideo.git /private/var/folders/z9/dz5wy_nd4_v1_gc8dg_5krqr0000gn/T/pip-install-61j1i73l/pytorchvideo_0d7e0ec9e7e042daadbe7e3b3e37996e\n", - " Running command git rev-parse -q --verify 'sha^ae9cfc6e62ca49eb9721a7a56e1e13e348ad21dc'\n", - " Running command git fetch -q https://github.com/facebookresearch/pytorchvideo.git ae9cfc6e62ca49eb9721a7a56e1e13e348ad21dc\n", - " Running command git checkout -q ae9cfc6e62ca49eb9721a7a56e1e13e348ad21dc\n", - " Resolved https://github.com/facebookresearch/pytorchvideo.git to commit ae9cfc6e62ca49eb9721a7a56e1e13e348ad21dc\n", - " Preparing metadata (setup.py) ... \u001b[?25ldone\n", - "\u001b[?25hRequirement already satisfied: torch>=1.13.1 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from imagebind==1.0.0) (2.6.0)\n", - "Requirement already satisfied: iopath in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from imagebind==1.0.0) (0.1.10)\n", - "Requirement already satisfied: einops in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from imagebind==1.0.0) (0.8.1)\n", - "Requirement already satisfied: types-regex in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from imagebind==1.0.0) (2024.11.6.20241221)\n", - "Requirement already satisfied: ftfy in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from imagebind==1.0.0) (6.3.1)\n", - "Requirement already satisfied: torchvision in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from imagebind==1.0.0) (0.21.0)\n", - "Requirement already satisfied: timm>=1.0.12 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from imagebind==1.0.0) (1.0.15)\n", - "Requirement already satisfied: cartopy in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from imagebind==1.0.0) (0.24.1)\n", - "Requirement already satisfied: fvcore in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from imagebind==1.0.0) (0.1.5.post20221221)\n", - "Requirement already satisfied: torchaudio in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from imagebind==1.0.0) (2.6.0)\n", - "Requirement already satisfied: numpy>=1.19 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from imagebind==1.0.0) (2.2.3)\n", - "Requirement already satisfied: matplotlib in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from imagebind==1.0.0) (3.10.0)\n", - "Requirement already satisfied: regex in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from imagebind==1.0.0) (2024.11.6)\n", - "Requirement already satisfied: huggingface_hub in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from timm>=1.0.12->imagebind==1.0.0) (0.29.1)\n", - "Requirement already satisfied: safetensors in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from timm>=1.0.12->imagebind==1.0.0) (0.5.3)\n", - "Requirement already satisfied: pyyaml in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from timm>=1.0.12->imagebind==1.0.0) (6.0.2)\n", - "Requirement already satisfied: jinja2 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from torch>=1.13.1->imagebind==1.0.0) (3.1.5)\n", - "Requirement already satisfied: networkx in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from torch>=1.13.1->imagebind==1.0.0) (3.4.2)\n", - "Requirement already satisfied: fsspec in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from torch>=1.13.1->imagebind==1.0.0) (2025.2.0)\n", - "Requirement already satisfied: typing-extensions>=4.10.0 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from torch>=1.13.1->imagebind==1.0.0) (4.12.2)\n", - "Requirement already satisfied: filelock in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from torch>=1.13.1->imagebind==1.0.0) (3.17.0)\n", - "Requirement already satisfied: sympy==1.13.1 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from torch>=1.13.1->imagebind==1.0.0) (1.13.1)\n", - "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from sympy==1.13.1->torch>=1.13.1->imagebind==1.0.0) (1.3.0)\n", - "Requirement already satisfied: pyshp>=2.3 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from cartopy->imagebind==1.0.0) (2.3.1)\n", - "Requirement already satisfied: pyproj>=3.3.1 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from cartopy->imagebind==1.0.0) (3.7.1)\n", - "Requirement already satisfied: shapely>=1.8 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from cartopy->imagebind==1.0.0) (2.0.7)\n", - "Requirement already satisfied: packaging>=21 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from cartopy->imagebind==1.0.0) (24.2)\n", - "Requirement already satisfied: cycler>=0.10 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from matplotlib->imagebind==1.0.0) (0.12.1)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from matplotlib->imagebind==1.0.0) (2.9.0.post0)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from matplotlib->imagebind==1.0.0) (4.56.0)\n", - "Requirement already satisfied: pillow>=8 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from matplotlib->imagebind==1.0.0) (11.1.0)\n", - "Requirement already satisfied: kiwisolver>=1.3.1 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from matplotlib->imagebind==1.0.0) (1.4.8)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from matplotlib->imagebind==1.0.0) (1.3.1)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from matplotlib->imagebind==1.0.0) (3.2.1)\n", - "Requirement already satisfied: wcwidth in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from ftfy->imagebind==1.0.0) (0.2.13)\n", - "Requirement already satisfied: yacs>=0.1.6 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from fvcore->imagebind==1.0.0) (0.1.8)\n", - "Requirement already satisfied: tqdm in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from fvcore->imagebind==1.0.0) (4.67.1)\n", - "Requirement already satisfied: termcolor>=1.1 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from fvcore->imagebind==1.0.0) (2.5.0)\n", - "Requirement already satisfied: tabulate in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from fvcore->imagebind==1.0.0) (0.9.0)\n", - "Requirement already satisfied: portalocker in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from iopath->imagebind==1.0.0) (3.1.1)\n", - "Requirement already satisfied: av in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from pytorchvideo@ git+https://github.com/facebookresearch/pytorchvideo.git@ae9cfc6e62ca49eb9721a7a56e1e13e348ad21dc->imagebind==1.0.0) (14.2.0)\n", - "Requirement already satisfied: parameterized in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from pytorchvideo@ git+https://github.com/facebookresearch/pytorchvideo.git@ae9cfc6e62ca49eb9721a7a56e1e13e348ad21dc->imagebind==1.0.0) (0.9.0)\n", - "Requirement already satisfied: certifi in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from pyproj>=3.3.1->cartopy->imagebind==1.0.0) (2025.1.31)\n", - "Requirement already satisfied: six>=1.5 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from python-dateutil>=2.7->matplotlib->imagebind==1.0.0) (1.17.0)\n", - "Requirement already satisfied: requests in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from huggingface_hub->timm>=1.0.12->imagebind==1.0.0) (2.32.3)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from jinja2->torch>=1.13.1->imagebind==1.0.0) (3.0.2)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from requests->huggingface_hub->timm>=1.0.12->imagebind==1.0.0) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from requests->huggingface_hub->timm>=1.0.12->imagebind==1.0.0) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from requests->huggingface_hub->timm>=1.0.12->imagebind==1.0.0) (2.3.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" - ] - } - ], - "source": [ - "# Install base dependencies\n", - "!pip install torch>=2.1.0 torchvision>=0.16.0 torchaudio>=2.1.0\n", - "!pip install opencv-python-headless pillow numpy\n", - "\n", - "# Install the specific ImageBind fork\n", - "!pip install git+https://github.com/hkchengrex/ImageBind.git" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "LISqDRmE8PpG" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" - ] - } - ], - "source": [ - "!pip -q install elasticsearch" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "id": "GGIFHatG9BTP" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: python-dotenv in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (1.0.1)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" - ] - } - ], - "source": [ - "!pip install python-dotenv" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: openai in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (1.64.0)\n", - "Requirement already satisfied: sniffio in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from openai) (1.3.1)\n", - "Requirement already satisfied: jiter<1,>=0.4.0 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from openai) (0.8.2)\n", - "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from openai) (2.10.6)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from openai) (4.8.0)\n", - "Requirement already satisfied: typing-extensions<5,>=4.11 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from openai) (4.12.2)\n", - "Requirement already satisfied: tqdm>4 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from openai) (4.67.1)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from openai) (0.28.1)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from openai) (1.9.0)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai) (1.2.2)\n", - "Requirement already satisfied: idna>=2.8 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai) (3.10)\n", - "Requirement already satisfied: httpcore==1.* in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from httpx<1,>=0.23.0->openai) (1.0.7)\n", - "Requirement already satisfied: certifi in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from httpx<1,>=0.23.0->openai) (2025.1.31)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai) (0.14.0)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->openai) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->openai) (2.27.2)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" - ] - } - ], - "source": [ - "!pip install openai" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: soundfile in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (0.13.1)\n", - "Requirement already satisfied: numpy in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from soundfile) (2.2.3)\n", - "Requirement already satisfied: cffi>=1.0 in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from soundfile) (1.17.1)\n", - "Requirement already satisfied: pycparser in /Users/jessgarson/.pyenv/versions/3.10.12/lib/python3.10/site-packages (from cffi>=1.0->soundfile) (2.22)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" - ] - } - ], - "source": [ - "!pip install soundfile" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jJt01mAeYaOT" - }, - "source": [ - "## Stage 1 - Collecting Crime Scene Clues\n", - "\n", - "As explained in the blog, the first step is to verify that we have the correct directory structure and that the evidence files are present. We use `files_check.py` for this." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "rZJexfwR4FaT" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "All files are correctly organized!\n" - ] - } - ], - "source": [ - "!python stages/01-stage/files_check.py" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0a1tNsiGYjEZ" - }, - "source": [ - "## Stage 2 - Generating Embeddings with ImageBind\n", - "\n", - "Now we test the embedding generation for an image using ImageBind. As the blog explains, ImageBind allows us to generate embeddings for different modalities (image, audio, text) in a shared vector space.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "id": "A6C9IIuA6dlH" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:embedding_generator:Testing model with sample input...\n", - "INFO:embedding_generator:🤖 ImageBind model initialized successfully\n", - "(1024,)\n" - ] - } - ], - "source": [ - "!python stages/02-stage/test_embedding_generation.py" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Vw5xlFXgYls4" - }, - "source": [ - "This script generates a 1024-dimensional embedding for a test image, confirming that the ImageBind model is working correctly.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Q2dsScL5ZF0X" - }, - "source": [ - "\n", - "## Stage 3 - Storage and Search in Elasticsearch\n", - "\n", - "### Content Indexing\n", - "\n", - "The next step is to index all multimodal evidence in Elasticsearch. This includes images, audio, text, and depth maps as described in the blog." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "id": "3nBsEf7u60bq" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:embedding_generator:Testing model with sample input...\n", - "INFO:embedding_generator:🤖 ImageBind model initialized successfully\n", - "INFO:elastic_transport.transport:HEAD https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content [status:200 duration:0.133s]\n", - "INFO:elastic_transport.transport:POST https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content/_doc [status:201 duration:0.053s]\n", - "INFO:__main__:\n", - "\n", - "Indexed vision: {\n", - " \"result\": \"created\",\n", - " \"_id\": \"9BkUSZUBvmLH5RQPHhhg\",\n", - " \"_index\": \"multimodal_content\"\n", - "}\n", - "INFO:elastic_transport.transport:POST https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content/_doc [status:201 duration:0.033s]\n", - "INFO:__main__:\n", - "\n", - "Indexed vision: {\n", - " \"result\": \"created\",\n", - " \"_id\": \"9RkUSZUBvmLH5RQPIBh2\",\n", - " \"_index\": \"multimodal_content\"\n", - "}\n", - "INFO:elastic_transport.transport:POST https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content/_doc [status:201 duration:0.030s]\n", - "INFO:__main__:\n", - "\n", - "Indexed vision: {\n", - " \"result\": \"created\",\n", - " \"_id\": \"9hkUSZUBvmLH5RQPIhi4\",\n", - " \"_index\": \"multimodal_content\"\n", - "}\n", - "INFO:elastic_transport.transport:POST https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content/_doc [status:201 duration:0.032s]\n", - "INFO:__main__:\n", - "\n", - "Indexed audio: {\n", - " \"result\": \"created\",\n", - " \"_id\": \"9xkUSZUBvmLH5RQPIxiA\",\n", - " \"_index\": \"multimodal_content\"\n", - "}\n", - "INFO:elastic_transport.transport:POST https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content/_doc [status:201 duration:0.033s]\n", - "INFO:__main__:\n", - "\n", - "Indexed text: {\n", - " \"result\": \"created\",\n", - " \"_id\": \"-BkUSZUBvmLH5RQPJBgz\",\n", - " \"_index\": \"multimodal_content\"\n", - "}\n", - "INFO:elastic_transport.transport:POST https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content/_doc [status:201 duration:0.031s]\n", - "INFO:__main__:\n", - "\n", - "Indexed text: {\n", - " \"result\": \"created\",\n", - " \"_id\": \"-RkUSZUBvmLH5RQPJRgE\",\n", - " \"_index\": \"multimodal_content\"\n", - "}\n", - "INFO:elastic_transport.transport:POST https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content/_doc [status:201 duration:0.029s]\n", - "INFO:__main__:\n", - "\n", - "Indexed depth: {\n", - " \"result\": \"created\",\n", - " \"_id\": \"-hkUSZUBvmLH5RQPJRhO\",\n", - " \"_index\": \"multimodal_content\"\n", - "}\n" - ] - } - ], - "source": [ - "!python stages/03-stage/index_all_modalities.py" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Tf-8U-CGZXxW" - }, - "source": [ - "\n", - "Each piece of evidence is now indexed in Elasticsearch with their respective embeddings, allowing for similarity search.\n", - "\n", - "### Searching by Similarity Across Different Modalities\n", - "\n", - "Now we can test searching for evidence by similarity using different modalities as queries. The blog describes how an input from one modality can retrieve results from all modalities.\n", - "\n", - "#### Search by Audio\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "id": "7f-MBkFALphP" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:embedding_generator:Testing model with sample input...\n", - "INFO:embedding_generator:🤖 ImageBind model initialized successfully\n", - "INFO:elastic_transport.transport:HEAD https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content [status:200 duration:0.183s]\n", - "INFO:elastic_transport.transport:POST https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content/_search [status:200 duration:0.101s]\n", - "\n", - "🔎 Similar evidence found:\n", - "\n", - "1. A sinister laugh captured near the crime scene (audio)\n", - " Similarity: 0.9987\n", - " File path: data/audios/joker_laugh.wav\n", - "\n", - "2. A sinister laugh captured near the crime scene (audio)\n", - " Similarity: 0.9987\n", - " File path: data/audios/joker_laugh.wav\n", - "\n", - "3. A sinister laugh captured near the crime scene (audio)\n", - " Similarity: 0.9987\n", - " File path: data/audios/joker_laugh.wav\n", - "\n" - ] - } - ], - "source": [ - "!python stages/03-stage/search_by_audio.py" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nrGUO1JVZZnz" - }, - "source": [ - "\n", - "This command uses an audio file as a query and retrieves the most similar evidence. In the case of Gotham, this helps identify connections between the audio of a sinister laugh and other evidence.\n", - "\n", - "#### Search by Text" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "id": "mm_RwbfYQBGK" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:embedding_generator:Testing model with sample input...\n", - "INFO:embedding_generator:🤖 ImageBind model initialized successfully\n", - "INFO:elastic_transport.transport:HEAD https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content [status:200 duration:0.091s]\n", - "INFO:elastic_transport.transport:POST https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content/_search [status:200 duration:0.168s]\n", - "\n", - "🔎 Similar evidence found:\n", - "\n", - "1. Mysterious note found at the location (text)\n", - " Similarity: 0.7639\n", - " File path: data/texts/riddle.txt\n", - "\n", - "2. Mysterious note found at the location (text)\n", - " Similarity: 0.7589\n", - " File path: data/texts/riddle.txt\n", - "\n", - "3. Mysterious note found at the location (text)\n", - " Similarity: 0.7589\n", - " File path: data/texts/riddle.txt\n", - "\n" - ] - } - ], - "source": [ - "!python stages/03-stage/search_by_text.py" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YXhvE2EbZgQt" - }, - "source": [ - "\n", - "Here we use a text query (\"Why so serious?\") to find related evidence.\n", - "\n", - "#### Search by Image\n" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "id": "jrOBYZwtQQng" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:embedding_generator:Testing model with sample input...\n", - "INFO:embedding_generator:🤖 ImageBind model initialized successfully\n", - "INFO:elastic_transport.transport:HEAD https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content [status:200 duration:0.152s]\n", - "INFO:elastic_transport.transport:POST https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content/_search [status:200 duration:0.081s]\n", - "\n", - "🔎 Similar evidence found:\n", - "\n", - "1. Photo of the crime scene: A dark, rain-soaked alley is filled with playing cards, while a sinister graffiti of the Joker laughing stands out on the brick wall. (vision)\n", - " Similarity: 0.8258\n", - " File path: data/images/crime_scene1.jpg\n", - "\n", - "2. Photo of the crime scene: A dark, rain-soaked alley is filled with playing cards, while a sinister graffiti of the Joker laughing stands out on the brick wall. (vision)\n", - " Similarity: 0.8258\n", - " File path: data/images/crime_scene1.jpg\n", - "\n", - "3. Photo of the crime scene: A dark, rain-soaked alley is filled with playing cards, while a sinister graffiti of the Joker laughing stands out on the brick wall. (vision)\n", - " Similarity: 0.8258\n", - " File path: data/images/crime_scene1.jpg\n", - "\n" - ] - } - ], - "source": [ - "!python stages/03-stage/search_by_image.py" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "V2Ut2whVZm3s" - }, - "source": [ - "This script uses an image from the crime scene to find similar visual evidence.\n", - "\n", - "#### Search by Depth Map\n" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "id": "Bbm1vWfXQiPZ" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:embedding_generator:Testing model with sample input...\n", - "INFO:embedding_generator:🤖 ImageBind model initialized successfully\n", - "INFO:elastic_transport.transport:HEAD https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content [status:200 duration:0.088s]\n", - "INFO:elastic_transport.transport:POST https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content/_search [status:200 duration:0.095s]\n", - "\n", - "🔎 Similar evidence found:\n", - "\n", - "1. Photo of the crime scene: A dark, rain-soaked alley is filled with playing cards, while a sinister graffiti of the Joker laughing stands out on the brick wall. (vision)\n", - " Similarity: 0.5053\n", - " File path: data/images/crime_scene1.jpg\n", - "\n", - "2. Photo of the crime scene: A dark, rain-soaked alley is filled with playing cards, while a sinister graffiti of the Joker laughing stands out on the brick wall. (vision)\n", - " Similarity: 0.5053\n", - " File path: data/images/crime_scene1.jpg\n", - "\n", - "3. Photo of the crime scene: A dark, rain-soaked alley is filled with playing cards, while a sinister graffiti of the Joker laughing stands out on the brick wall. (vision)\n", - " Similarity: 0.5053\n", - " File path: data/images/crime_scene1.jpg\n", - "\n" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "dGVterhZUeb7" + }, + "source": [ + "\n", + "\n", + "# Multimodal RAG with Elasticsearch: The Gotham City Case\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JGuNiw7hUc6M" + }, + "source": [ + "This notebook implements the Multimodal RAG (Retrieval-Augmented Generation) pipeline with Elasticsearch as described in the blog. We follow the same structure as the article, with each section explained and implemented in code.\n", + "\n", + "## Environment Setup\n", + "\n", + "First, we need to clone the repository that contains the complete project code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UM5x0n2iA7o2" + }, + "outputs": [], + "source": [ + "# Clone do repositório específico com a branch feature/multimodal-rag-gotham\n", + "!git clone https://github.com/salgado/elasticsearch-labs.git" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "U5k4YuR8TNZG" + }, + "outputs": [], + "source": [ + "import getpass" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e6mW8JNyVdzi" + }, + "source": [ + "Let's navigate to the project directory where the necessary files are located:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PHrDQc0jOOb7" + }, + "outputs": [], + "source": [ + "cd elasticsearch-labs/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LAGB159_Uaxb" + }, + "source": [ + "Now let's configure the environment variables needed to connect to Elasticsearch and OpenAI. This is necessary for indexing and searching content, as well as generating the final report.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "U8IuJRQhS7lz" + }, + "outputs": [], + "source": [ + "ELASTICSEARCH_URL = input(\"Enter the Elasticsearch endpoint url: \")\n", + "ELASTICSEARCH_API_KEY = getpass.getpass(\"Enter the Elasticsearch API key: \")\n", + "OPENAI_API_KEY = getpass.getpass(\"Enter the OpenAI API key: \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZC4v_SHjMwLa" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"ELASTICSEARCH_API_KEY\"] = ELASTICSEARCH_API_KEY\n", + "os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY\n", + "os.environ[\"ELASTICSEARCH_URL\"] = ELASTICSEARCH_URL" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RNRExs7aVl45" + }, + "source": [ + "\n", + "## Installing Dependencies\n", + "\n", + "As mentioned in the blog, we need to install the specific dependencies, including the custom ImageBind fork:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FhPcJYl03eNL" + }, + "outputs": [], + "source": [ + "# Install base dependencies\n", + "!pip install torch>=2.1.0 torchvision>=0.16.0 torchaudio>=2.1.0\n", + "!pip install opencv-python-headless pillow numpy\n", + "\n", + "# Install the specific ImageBind fork\n", + "!pip install git+https://github.com/hkchengrex/ImageBind.git" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LISqDRmE8PpG" + }, + "outputs": [], + "source": [ + "!pip -q install elasticsearch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GGIFHatG9BTP" + }, + "outputs": [], + "source": [ + "!pip install python-dotenv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "05Sq2ZtHTNZH" + }, + "outputs": [], + "source": [ + "!pip install openai" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LtJvz5t4TNZH" + }, + "outputs": [], + "source": [ + "!pip install soundfile" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jJt01mAeYaOT" + }, + "source": [ + "## Stage 1 - Collecting Crime Scene Clues\n", + "\n", + "As explained in the blog, the first step is to verify that we have the correct directory structure and that the evidence files are present. We use `files_check.py` for this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rZJexfwR4FaT" + }, + "outputs": [], + "source": [ + "!python stages/01-stage/files_check.py" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0a1tNsiGYjEZ" + }, + "source": [ + "## Stage 2 - Generating Embeddings with ImageBind\n", + "\n", + "Now we test the embedding generation for an image using ImageBind. As the blog explains, ImageBind allows us to generate embeddings for different modalities (image, audio, text) in a shared vector space.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "A6C9IIuA6dlH" + }, + "outputs": [], + "source": [ + "!python stages/02-stage/test_embedding_generation.py" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Vw5xlFXgYls4" + }, + "source": [ + "This script generates a 1024-dimensional embedding for a test image, confirming that the ImageBind model is working correctly.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q2dsScL5ZF0X" + }, + "source": [ + "\n", + "## Stage 3 - Storage and Search in Elasticsearch\n", + "\n", + "### Content Indexing\n", + "\n", + "The next step is to index all multimodal evidence in Elasticsearch. This includes images, audio, text, and depth maps as described in the blog." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3nBsEf7u60bq" + }, + "outputs": [], + "source": [ + "!python stages/03-stage/index_all_modalities.py" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Tf-8U-CGZXxW" + }, + "source": [ + "\n", + "Each piece of evidence is now indexed in Elasticsearch with their respective embeddings, allowing for similarity search.\n", + "\n", + "### Searching by Similarity Across Different Modalities\n", + "\n", + "Now we can test searching for evidence by similarity using different modalities as queries. The blog describes how an input from one modality can retrieve results from all modalities.\n", + "\n", + "#### Search by Audio\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7f-MBkFALphP" + }, + "outputs": [], + "source": [ + "!python stages/03-stage/search_by_audio.py" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nrGUO1JVZZnz" + }, + "source": [ + "\n", + "This command uses an audio file as a query and retrieves the most similar evidence. In the case of Gotham, this helps identify connections between the audio of a sinister laugh and other evidence.\n", + "\n", + "#### Search by Text" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mm_RwbfYQBGK" + }, + "outputs": [], + "source": [ + "!python stages/03-stage/search_by_text.py" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YXhvE2EbZgQt" + }, + "source": [ + "\n", + "Here we use a text query (\"Why so serious?\") to find related evidence.\n", + "\n", + "#### Search by Image\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jrOBYZwtQQng" + }, + "outputs": [], + "source": [ + "!python stages/03-stage/search_by_image.py" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V2Ut2whVZm3s" + }, + "source": [ + "This script uses an image from the crime scene to find similar visual evidence.\n", + "\n", + "#### Search by Depth Map\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Bbm1vWfXQiPZ" + }, + "outputs": [], + "source": [ + "!python stages/03-stage/search_by_depth.py" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DWSzg742ZoQw" + }, + "source": [ + "As explained in the blog, depth maps can provide information about the 3D structure of the scene or objects, complementing the other modalities.\n", + "\n", + "## Stage 4 - Evidence Analysis with LLM\n", + "\n", + "Finally, we bring together all the retrieved evidence and use an LLM (GPT-4) to generate a forensic report that identifies the suspect based on the connections between the different modalities.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "A8pmOH31Q2Hc" + }, + "outputs": [], + "source": [ + "!python stages/04-stage/rag_crime_analyze.py" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VaWriUfjZyUz" + }, + "source": [ + "\n", + "This is the final step of the Multimodal RAG pipeline, where the LLM analyzes the evidence retrieved from Elasticsearch and synthesizes it into a coherent report that identifies the Joker as the main suspect.\n", + "\n", + "## Conclusion\n", + "\n", + "We have thus completed the implementation of the complete Multimodal RAG pipeline with Elasticsearch, following all the steps described in the blog. This pipeline demonstrates how different types of media can be analyzed in an integrated way to provide richer insights and connections between evidence that would be difficult to identify manually.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ji3l3u-bTNZI" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XAj2w5qVTNZI" + }, + "source": [] } - ], - "source": [ - "!python stages/03-stage/search_by_depth.py" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DWSzg742ZoQw" - }, - "source": [ - "As explained in the blog, depth maps can provide information about the 3D structure of the scene or objects, complementing the other modalities.\n", - "\n", - "## Stage 4 - Evidence Analysis with LLM\n", - "\n", - "Finally, we bring together all the retrieved evidence and use an LLM (GPT-4) to generate a forensic report that identifies the suspect based on the connections between the different modalities.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "id": "A8pmOH31Q2Hc" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:embedding_generator:Testing model with sample input...\n", - "INFO:embedding_generator:🤖 ImageBind model initialized successfully\n", - "INFO:elastic_transport.transport:HEAD https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content [status:200 duration:0.072s]\n", - "INFO:__main__:✅ All components initialized successfully\n", - "INFO:__main__:🔍 Collecting evidence...\n", - "INFO:elastic_transport.transport:POST https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content/_search [status:200 duration:0.095s]\n", - "INFO:__main__:✅ Data retrieved for vision: 2 results\n", - "INFO:elastic_transport.transport:POST https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content/_search [status:200 duration:0.028s]\n", - "INFO:__main__:✅ Data retrieved for audio: 2 results\n", - "INFO:elastic_transport.transport:POST https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content/_search [status:200 duration:0.024s]\n", - "INFO:__main__:✅ Data retrieved for text: 2 results\n", - "INFO:elastic_transport.transport:POST https://getting-started.es.us-east4.gcp.elastic-cloud.com:443/multimodal_content/_search [status:200 duration:0.038s]\n", - "INFO:__main__:✅ Data retrieved for depth: 2 results\n", - "INFO:__main__:\n", - "📝 Generating forensic report...\n", - "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "INFO:llm_analyzer:\n", - "📋 Forensic Report Generated:\n", - "INFO:llm_analyzer:==================================================\n", - "INFO:llm_analyzer:**Prime Suspect:** The Joker\n", - "\n", - "**Evidence Supporting Conclusion:**\n", - "\n", - "- **Visual Evidence:** The photos of the crime scene are highly indicative of the Joker's involvement. The presence of playing cards scattered around a dark, rain-soaked alley is a well-known signature of the Joker, serving as both his calling card and a symbol of his chaotic nature. The graffiti depicting the Joker laughing is a visual taunt and a mark of territory, further implicating him. The similarity score of 0.83 for both photos suggests a high degree of confidence in the visual match to known patterns of the Joker's previous crimes.\n", - "\n", - "- **Auditory Evidence:** The sinister laugh captured near the crime scene, with a similarity score of 1.00, is a direct auditory signature of the Joker. This laugh is not only iconic but has been documented in numerous encounters with law enforcement and victims. The perfect match indicates that the laugh is either directly from the Joker or a recording meant to signify his presence or involvement.\n", - "\n", - "- **Textual Evidence:** The mysterious note found at the location, with a similarity score of 0.76, suggests a cryptic message or clue left behind by the criminal. While the content of the note is not detailed, the Joker is known for leaving such notes as part of his psychological games and clues to his next move. The similarity score indicates a strong but not perfect match to known writings of the Joker, possibly due to variations in the message or medium used.\n", - "\n", - "- **Depth Evidence:** The depth sensor capture of the suspect, with a similarity score of 0.77, suggests a figure that matches the physical profile of the Joker. While depth captures can be less definitive in identifying specific facial features, the height, build, and posture can be compared against known data of the Joker.\n", - "\n", - "**Behavioral Patterns:**\n", - "\n", - "The Joker is characterized by his love for chaos, dramatic flair, and leaving symbolic markers at his crime scenes. The combination of playing cards, the sinister laugh, and the cryptic note all align with his known behavioral patterns. His motives often include creating disorder, challenging Batman, and leaving a signature that unmistakably points to him, all of which are evident in this case.\n", - "\n", - "**Confidence Level:** 95%\n", - "\n", - "The evidence strongly points to the Joker as the prime suspect due to the convergence of visual, auditory, textual, and depth data that aligns with his known patterns and behaviors. The slight deductions in confidence are due to the inherent limitations in depth sensor identification and the slightly lower similarity score of the textual evidence.\n", - "\n", - "**Next Steps:** No further evidence required.\n", - "\n", - "The combination of multimodal evidence provides a comprehensive and conclusive identification of the Joker as the perpetrator. However, capturing additional direct visual confirmation of the Joker at the scene or obtaining a confession would only serve to reinforce the already strong case.\n", - "INFO:llm_analyzer:==================================================\n", - "INFO:__main__:✅ Forensic report generated successfully\n", - "INFO:__main__:\n", - "📊 Report Preview:\n", - "INFO:__main__:++++++++++++++++++++++++++++++++++++++++++++++++++\n", - "INFO:__main__:**Prime Suspect:** The Joker\n", - "\n", - "**Evidence Supporting Conclusion:**\n", - "\n", - "- **Visual Evidence:** The photos of the crime scene are highly indicative of the Joker's involvement. The presence of playing cards scattered around a dark, rain-soaked alley is a well-known signature of the Joker, serving as both his calling card and a symbol of his chaotic nature. The graffiti depicting the Joker laughing is a visual taunt and a mark of territory, further implicating him. The similarity score of 0.83 for both photos suggests a high degree of confidence in the visual match to known patterns of the Joker's previous crimes.\n", - "\n", - "- **Auditory Evidence:** The sinister laugh captured near the crime scene, with a similarity score of 1.00, is a direct auditory signature of the Joker. This laugh is not only iconic but has been documented in numerous encounters with law enforcement and victims. The perfect match indicates that the laugh is either directly from the Joker or a recording meant to signify his presence or involvement.\n", - "\n", - "- **Textual Evidence:** The mysterious note found at the location, with a similarity score of 0.76, suggests a cryptic message or clue left behind by the criminal. While the content of the note is not detailed, the Joker is known for leaving such notes as part of his psychological games and clues to his next move. The similarity score indicates a strong but not perfect match to known writings of the Joker, possibly due to variations in the message or medium used.\n", - "\n", - "- **Depth Evidence:** The depth sensor capture of the suspect, with a similarity score of 0.77, suggests a figure that matches the physical profile of the Joker. While depth captures can be less definitive in identifying specific facial features, the height, build, and posture can be compared against known data of the Joker.\n", - "\n", - "**Behavioral Patterns:**\n", - "\n", - "The Joker is characterized by his love for chaos, dramatic flair, and leaving symbolic markers at his crime scenes. The combination of playing cards, the sinister laugh, and the cryptic note all align with his known behavioral patterns. His motives often include creating disorder, challenging Batman, and leaving a signature that unmistakably points to him, all of which are evident in this case.\n", - "\n", - "**Confidence Level:** 95%\n", - "\n", - "The evidence strongly points to the Joker as the prime suspect due to the convergence of visual, auditory, textual, and depth data that aligns with his known patterns and behaviors. The slight deductions in confidence are due to the inherent limitations in depth sensor identification and the slightly lower similarity score of the textual evidence.\n", - "\n", - "**Next Steps:** No further evidence required.\n", - "\n", - "The combination of multimodal evidence provides a comprehensive and conclusive identification of the Joker as the perpetrator. However, capturing additional direct visual confirmation of the Joker at the scene or obtaining a confession would only serve to reinforce the already strong case.\n", - "INFO:__main__:++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.1" } - ], - "source": [ - "!python stages/04-stage/rag_crime_analyze.py" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VaWriUfjZyUz" - }, - "source": [ - "\n", - "This is the final step of the Multimodal RAG pipeline, where the LLM analyzes the evidence retrieved from Elasticsearch and synthesizes it into a coherent report that identifies the Joker as the main suspect.\n", - "\n", - "## Conclusion\n", - "\n", - "We have thus completed the implementation of the complete Multimodal RAG pipeline with Elasticsearch, following all the steps described in the blog. This pipeline demonstrates how different types of media can be analyzed in an integrated way to provide richer insights and connections between evidence that would be difficult to identify manually.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.1" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/notebook/elasticsearch-labs b/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/notebook/elasticsearch-labs deleted file mode 160000 index c90ba3d0..00000000 --- a/supporting-blog-content/building-multimodal-rag-with-elasticsearch-gotham/notebook/elasticsearch-labs +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c90ba3d067e2cd639a1babf9cebbf658cedcad9a