NVIDIA-AI-Blueprints · nv-edwli · Apr 3, 2025 · Apr 3, 2025 · Apr 3, 2025 · Apr 3, 2025
diff --git a/.gitignore b/.gitignore
@@ -19,4 +19,66 @@ uploaded_files/
 docs/_*
 
 # Notebook checkpoints
-notebooks/.ipynb_checkpoints
+notebooks/.ipynb_checkpoints
+
+# Ignore generated or temporary files managed by the Workbench
+.project/*
+!.project/spec.yaml
+!.project/configpacks
+
+# General ignores
+.DS_Store
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Temp directories, notebooks created by jupyterlab
+.ipynb_checkpoints
+.Trash-*/
+.jupyter/
+.local/
+
+# Python distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Workbench Project Layout
+workbench/data/*
+workbench/volumes/*
+volumes/*
+workbench/nim_cache/huggingface/
+workbench/nim_cache/local_cache/
+workbench/nim_cache/ngc/
diff --git a/.project/configpacks b/.project/configpacks
@@ -0,0 +1,11 @@
+*defaults.ContainerUser
+*bash.PreBuild
+*defaults.EnvVars
+*defaults.Readme
+*defaults.CA
+*defaults.Entrypoint
+*apt.PackageManager
+*bash.PreLanguage
+*python.PipPackageManager
+*bash.PostBuild
+*jupyterlab.JupyterLab
diff --git a/.project/spec.yaml b/.project/spec.yaml
@@ -0,0 +1,91 @@
+specVersion: v2
+specMinorVersion: 2
+meta:
+    name: rag
+    image: project-rag
+    description: ""
+    labels: []
+    createdOn: "2024-12-06T19:19:11Z"
+    defaultBranch: main
+layout: []
+environment:
+    base:
+        registry: nvcr.io
+        image: nvidia/ai-workbench/python-basic:1.0.2
+        build_timestamp: "20241001182612"
+        name: Python Basic
+        supported_architectures: []
+        cuda_version: ""
+        description: A Python Base with Jupyterlab
+        entrypoint_script: ""
+        labels:
+            - ubuntu
+            - python3
+            - jupyterlab
+        apps:
+            - name: jupyterlab
+              type: jupyterlab
+              class: webapp
+              start_command: jupyter lab --allow-root --port 8888 --ip 0.0.0.0 --no-browser --NotebookApp.base_url=\$PROXY_PREFIX --NotebookApp.default_url=/lab --NotebookApp.allow_origin='*'
+              health_check_command: '[ \$(echo url=\$(jupyter lab list | head -n 2 | tail -n 1 | cut -f1 -d'' '' | grep -v ''Currently'' | sed "s@/?@/lab?@g") | curl -o /dev/null -s -w ''%{http_code}'' --config -) == ''200'' ]'
+              stop_command: jupyter lab stop 8888
+              user_msg: ""
+              logfile_path: ""
+              timeout_seconds: 60
+              icon_url: ""
+              webapp_options:
+                autolaunch: true
+                port: "8888"
+                proxy:
+                    trim_prefix: false
+                url_command: jupyter lab list | head -n 2 | tail -n 1 | cut -f1 -d' ' | grep -v 'Currently'
+        programming_languages:
+            - python3
+        icon_url: https://workbench.download.nvidia.com/static/img/ai-workbench-icon-rectangle.jpg
+        image_version: 1.0.5
+        os: linux
+        os_distro: ubuntu
+        os_distro_release: "22.04"
+        schema_version: v2
+        user_info:
+            uid: ""
+            gid: ""
+            username: ""
+        package_managers:
+            - name: apt
+              binary_path: /usr/bin/apt
+              installed_packages:
+                - curl
+                - git
+                - git-lfs
+                - python3
+                - gcc
+                - python3-dev
+                - python3-pip
+                - vim
+            - name: pip
+              binary_path: /usr/bin/pip
+              installed_packages:
+                - jupyterlab==4.2.4
+        package_manager_environment:
+            name: ""
+            target: ""
+    compose_file_path: workbench/compose.yaml
+execution:
+    apps: []
+    resources:
+        gpu:
+            requested: 0
+        sharedMemoryMB: 0
+    secrets:
+        - variable: NVIDIA_API_KEY
+          description: ""
+    mounts:
+        - type: project
+          target: /project/
+          description: Project directory
+          options: rw
+        - type: volume
+          target: /nvwb-shared-volume/
+          description: ""
+          options: volumeName=nvwb-shared-volume
diff --git a/README.md b/README.md
@@ -28,6 +28,10 @@ This blueprint demonstrates how to set up a RAG solution that uses NVIDIA NIM an
 By default, this blueprint leverages locally-deployed NVIDIA NIM microservices to meet specific data governance and latency requirements.
 However, you can replace these models with your NVIDIA-hosted models available in the [NVIDIA API Catalog](https://build.nvidia.com).
 
+| :exclamation: Important |
+| :-----------------------|
+| Users running this blueprint with [NVIDIA AI Workbench](https://www.nvidia.com/en-us/deep-learning-ai/solutions/data-science/workbench/) should skip to the quickstart section [here](workbench/README.md#get-started)! |
+
 ## Key Features
 - Multimodal data extraction support with text, tables, charts, and infographics
 - Hybrid search with dense and sparse search

diff --git a/notebooks/ingestion_api_usage.ipynb b/notebooks/ingestion_api_usage.ipynb
@@ -58,7 +58,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "IPADDRESS = \"localhost\" #Replace this with the correct IP address\n",
+    "IPADDRESS = \"ingestor-server\" if \"AI_WORKBENCH\" in os.environ else \"localhost\" #Replace this with the correct IP address\n",
     "INGESTOR_SERVER_PORT = \"8082\"\n",
     "BASE_URL = f\"http://{IPADDRESS}:{INGESTOR_SERVER_PORT}\"  # Replace with your server URL\n",
     "\n",
@@ -330,7 +330,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },

diff --git a/notebooks/retriever_api_usage.ipynb b/notebooks/retriever_api_usage.ipynb
@@ -52,7 +52,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "IPADDRESS = \"localhost\" #Replace this with the correct IP address\n",
+    "IPADDRESS = \"rag-server\" if \"AI_WORKBENCH\" in os.environ else \"localhost\" #Replace this with the correct IP address\n",
     "RAG_SERVER_PORT = \"8081\"\n",
     "BASE_URL = f\"http://{IPADDRESS}:{RAG_SERVER_PORT}\"  # Replace with your server URL\n",
     "\n",
@@ -221,7 +221,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.16"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,

diff --git a/requirements.txt b/requirements.txt
@@ -22,3 +22,4 @@ opentelemetry-exporter-prometheus==0.50b0
 opentelemetry-instrumentation-milvus==0.36.0
 opentelemetry-instrumentation-fastapi==0.50b0
 opentelemetry-processor-baggage==0.50b0
+jupyterlab>3.0
diff --git a/variables.env b/variables.env
@@ -0,0 +1,21 @@
+# Flag for AI Workbench env
+AI_WORKBENCH=true
+
+# Default docker volumes
+DATASET_ROOT=ingest
+DOCKER_VOLUME_DIRECTORY=vectordb
+
+### Uncomment to override Compose with BUILD ENDPOINTS ###
+#APP_EMBEDDINGS_SERVERURL=
+#APP_LLM_SERVERURL=
+#APP_RANKING_SERVERURL=
+#EMBEDDING_NIM_ENDPOINT=https://integrate.api.nvidia.com/v1
+#PADDLE_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/baidu/paddleocr
+#PADDLE_INFER_PROTOCOL=http
+#YOLOX_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2
+#YOLOX_INFER_PROTOCOL=http
+#YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-graphic-elements-v1
+#YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL=http
+#YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-table-structure-v1
+#YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL=http
+
diff --git a/workbench/README.md b/workbench/README.md
@@ -0,0 +1,74 @@
+# Quickstart for NVIDIA AI Workbench 
+
+This blueprint is for developers who want a quick start to set up a RAG solution with a path-to-production with NVIDIA NIM.
+
+> **Note**
+> This app runs in [NVIDIA AI Workbench](https://docs.nvidia.com/ai-workbench/user-guide/latest/overview/introduction.html). It's a free, lightweight developer platform that you can run on your own systems to get up and running with complex AI applications and workloads in a short amount of time. 
+
+> You may want to [**fork**](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo#forking-a-repository) this repository into your own account before proceeding. Otherwise you won't be able to fully push any changes you make because this NVIDIA-owned repository is **read-only**.
+
+*Navigating the README*: [Project Overview](#project-overview) | [Get Started](#get-started) | [License](#license)
+
+*Other Resources*: [:arrow_down: Download AI Workbench](https://www.nvidia.com/en-us/deep-learning-ai/solutions/data-science/workbench/) | [:book: User Guide](https://docs.nvidia.com/ai-workbench/) |[:open_file_folder: Other Projects](https://docs.nvidia.com/ai-workbench/user-guide/latest/quickstart/example-projects.html) | :rotating_light: User Forum (Coming Soon)
+
+## Project Overview
+
+This blueprint serves as a reference solution for a foundational Retrieval Augmented Generation (RAG) pipeline. One of the key use cases in Generative AI is enabling users to ask questions and receive answers based on their enterprise data corpus. This blueprint demonstrates how to set up a RAG solution that uses NVIDIA NIM and GPU-accelerated components. By default, this blueprint leverages locally-deployed NVIDIA NIM microservices to meet specific data governance and latency requirements. However, you can replace these models with your NVIDIA-hosted models available in the [NVIDIA API Catalog](build.nvidia.com).
+
+[Read More](../README.md#software-components)
+
+## Get Started
+
+Ensure you have satisfied the prerequisites for this Blueprint ([details](../README.md#hardware-requirements)). 
+
+### Use Build Endpoints
+
+1. Open NVIDIA AI Workbench. Select a **Location** to work in.
+
+1. **Clone** the project with URL: https://github.com/NVIDIA-AI-Blueprints/rag
+
+1. On the **Project Dashboard**, resolve the yellow unconfigured secrets warning by inputting your ``NVIDIA_API_KEY``.
+
+1. Select ``ingest``, ``rag``, and ``vectordb`` compose profiles from the dropdown under the **Compose** section.
+
+1. Select **Start**. The compose services may take several minutes to pull and build.
+
+1. When the compose services are ready, select **View Compose Settings**.
+
+1. Locate the ``rag-playground`` service and select the **Open in Browser** icon.
+
+    * Alternatively, you can access the frontend on the IP address, eg. ``http://<ip_addr>:8090``. 
+
+1. You can now interact with the RAG Chatbot through its browser interface.
+
+### Local Hosting
+
+1. Open NVIDIA AI Workbench. Select a **Location** to work in.
+
+1. **Clone** the project with URL: https://github.com/NVIDIA-AI-Blueprints/rag
+
+1. On the **Project Dashboard**, resolve the yellow unconfigured secrets warning by inputting your ``NVIDIA_API_KEY``.
+
+1. On the **File Browser**, locate the ``variables.env`` file.
+
+1. From the hamburger menu, select **Edit**. Comment out the overriding variables for Build endpoints. 
+
+1. Select ``ingest``, ``rag``, ``vectordb``, and ``local`` compose profiles from the dropdown under the **Compose** section.
+
+1. Select **Start**. The compose services may take several minutes to pull and build.
+
+1. When the compose services are ready, select **View Compose Settings**.
+
+1. Locate the ``rag-playground`` service and select the **Open in Browser** icon.
+
+    * Alternatively, you can access the frontend on the IP address, eg. ``http://<ip_addr>:8090``. 
+
+1. You can now interact with the RAG Chatbot through its browser interface.
+
+## License
+
+This NVIDIA NVIDIA AI BLUEPRINT is licensed under the [Apache License, Version 2.0.](../LICENSE) This project will download and install additional third-party open source software projects and containers. Review [the license terms of these open source projects](../LICENSE-3rd-party.txt) before use.
+
+The software and materials are governed by the NVIDIA Software License Agreement (found at https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-software-license-agreement/) and the Product-Specific Terms for NVIDIA AI Products (found at https://www.nvidia.com/en-us/agreements/enterprise-software/product-specific-terms-for-ai-products/), except that models are governed by the AI Foundation Models Community License Agreement (found at NVIDIA Agreements | Enterprise Software | NVIDIA Community Model License) and NVIDIA dataset is governed by the NVIDIA Asset License Agreement found [here](../data/LICENSE.DATA).
+
+For Meta/llama-3.1-70b-instruct model the Llama 3.1 Community License Agreement, for nvidia/llama-3.2-nv-embedqa-1b-v2model the Llama 3.2 Community License Agreement, and for the nvidia/llama-3.2-nv-rerankqa-1b-v2 model the Llama 3.2 Community License Agreement. Built with Llama.