fix(deploy): improve Docker-Compose and quickstart on Docker (#2037)

jaluma · jaluma · commit dae0727a1b4a · 2024-08-05T16:28:19.000+02:00
* chore: update docker-compose with profiles

* docs: add quick start doc
diff --git a/.docker/router.yml b/.docker/router.yml
@@ -0,0 +1,16 @@
+http:
+  services:
+    ollama:
+      loadBalancer:
+        healthCheck:
+          interval: 5s
+          path: /
+        servers:
+          - url: http://ollama-cpu:11434
+          - url: http://ollama-cuda:11434
+          - url: http://host.docker.internal:11434
+
+  routers:
+    ollama-router:
+      rule: "PathPrefix(`/`)"
+      service: ollama
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -1,19 +1,99 @@
 services:
-  private-gpt:
+
+  #-----------------------------------
+  #---- Private-GPT services ---------
+  #-----------------------------------
+
+  # Private-GPT service for the Ollama CPU and GPU modes
+  # This service builds from an external Dockerfile and runs the Ollama mode.
+  private-gpt-ollama:
     build:
+      context: .
       dockerfile: Dockerfile.external
     volumes:
       - ./local_data/:/home/worker/app/local_data
     ports:
-      - 8001:8001
+      - "8001:8001"
     environment:
       PORT: 8001
       PGPT_PROFILES: docker
       PGPT_MODE: ollama
       PGPT_EMBED_MODE: ollama
+      PGPT_OLLAMA_API_BASE: http://ollama:11434
+    profiles:
+      - ""
+      - ollama
+      - ollama-cuda
+      - ollama-host
+
+  # Private-GPT service for the local mode
+  # This service builds from a local Dockerfile and runs the application in local mode.
+  private-gpt-local:
+    build:
+      context: .
+      dockerfile: Dockerfile.local
+    volumes:
+      - ./local_data/:/home/worker/app/local_data
+      - ./models/:/home/worker/app/models
+    entrypoint: sh -c ".venv/bin/python scripts/setup && .venv/bin/python -m private_gpt"
+    ports:
+      - "8001:8001"
+    environment:
+      PORT: 8001
+      PGPT_PROFILES: local
+      HF_TOKEN: ${HF_TOKEN}
+    profiles:
+      - local
+
+  #-----------------------------------
+  #---- Ollama services --------------
+  #-----------------------------------
+
+  # Traefik reverse proxy for the Ollama service
+  # This will route requests to the Ollama service based on the profile.
   ollama:
-    image: ollama/ollama:latest
+    image: traefik:v2.10
     ports:
-      - 11434:11434
+      - "11435:11434"
+      - "8081:8080"
+    command:
+      - "--providers.file.filename=/etc/router.yml"
+      - "--log.level=ERROR"
+      - "--api.insecure=true"
+      - "--providers.docker=true"
+      - "--providers.docker.exposedbydefault=false"
+      - "--entrypoints.web.address=:11434"
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock:ro
+      - ./.docker/router.yml:/etc/router.yml:ro
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    profiles:
+      - ""
+      - ollama
+      - ollama-cuda
+      - ollama-host
+
+  # Ollama service for the CPU mode
+  ollama-cpu:
+    image: ollama/ollama:latest
+    volumes:
+      - ./models:/root/.ollama
+    profiles:
+      - ""
+      - ollama
+
+  # Ollama service for the CUDA mode
+  ollama-cuda:
+    image: ollama/ollama:latest
     volumes:
       - ./models:/root/.ollama
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    profiles:
+      - ollama-cuda
diff --git a/fern/docs.yml b/fern/docs.yml
@@ -10,6 +10,9 @@ tabs:
   overview:
     display-name: Overview
     icon: "fa-solid fa-home"
+  quickstart:
+    display-name: Quickstart
+    icon: "fa-solid fa-rocket"
   installation:
     display-name: Installation
     icon: "fa-solid fa-download"
@@ -32,6 +35,12 @@ navigation:
         contents:
           - page: Introduction
             path: ./docs/pages/overview/welcome.mdx
+  - tab: quickstart
+    layout:
+      - section: Getting started
+        contents:
+          - page: Quickstart
+            path: ./docs/pages/quickstart/quickstart.mdx
   # How to install PrivateGPT, with FAQ and troubleshooting
   - tab: installation
     layout:
diff --git a/fern/docs/pages/quickstart/quickstart.mdx b/fern/docs/pages/quickstart/quickstart.mdx
@@ -0,0 +1,85 @@
+This guide provides a quick start for running different profiles of PrivateGPT using Docker Compose.
+The profiles cater to various environments, including Ollama setups (CPU, CUDA, MacOS) and fully Local setup.
+
+If you want to run PrivateGPT locally without Docker, refer to the [Local Installation Guide](/installation).
+
+#### Prerequisites
+- **Docker and Docker Compose:** Ensure both are installed on your system.
+[Installation Guide for Docker](https://docs.docker.com/get-docker/), [Installation Guide for Docker Compose](https://docs.docker.com/compose/install/).
+- **Clone PrivateGPT Repository:** Clone the PrivateGPT repository to your machine and navigate to the directory:
+  ```sh
+  git clone https://github.com/zylon-ai/private-gpt.git
+  cd private-gpt
+  ```
+
+---
+
+## Ollama Setups (Recommended)
+
+Ollama setups are recommended for their ease of use and optimized configurations. Ollama offers different profiles depending on your hardware capabilities and operating system.
+
+### 1. Default/Ollama CPU
+
+**Description:**
+This profile runs the Ollama service using CPU resources. It is the standard configuration for running Ollama-based Private-GPT services without GPU acceleration.
+
+**Run:**
+To start the services, use either of the following commands:
+```sh
+docker-compose up
+```
+or
+```sh
+docker-compose --profile ollama up
+```
+
+### 2. Ollama Nvidia CUDA
+
+**Description:**
+This profile leverages GPU acceleration with CUDA support, suitable for computationally intensive tasks that benefit from GPU resources.
+
+**Requirements:**
+- Ensure that your system has compatible GPU hardware and the necessary NVIDIA drivers installed. The installation process is detailed [here](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
+
+**Run:**
+To start the services with CUDA support, use:
+```sh
+docker-compose --profile ollama-cuda up
+```
+
+### 3. Ollama Host
+
+**Description:**
+This profile is designed for running PrivateGPT using Ollama installed on the host machine. This setup is particularly useful for MacOS users, as Docker does not yet support Metal GPU.
+
+**Requirements:**
+- Install Ollama on your machine by following the instructions at [ollama.ai](https://ollama.ai/).
+- Start the Ollama service with the command:
+```sh
+OLLAMA_HOST=0.0.0.0 ollama serve
+```
+
+**Run:**
+To start the services with the host configuration, use:
+```sh
+docker-compose --profile ollama-host up
+```
+
+---
+
+## Fully Local Setups
+
+### LlamaCPP + HuggingFace Embeddings
+
+**Description:**
+This profile runs the Private-GPT services locally using `llama-cpp` and Hugging Face models.
+
+**Requirements:**
+- **Hugging Face Token (HF_TOKEN):** Required for accessing Hugging Face models. Obtain your token following [this guide](/installation/getting-started/troubleshooting#downloading-gated-and-private-models).
+
+**Run:**
+Start the services with your Hugging Face token:
+```sh
+HF_TOKEN=<your_hf_token> docker-compose up --profile local
+```
+Replace `<your_hf_token>` with your actual Hugging Face token.