-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
67 lines (64 loc) · 3.16 KB
/
Copy pathdocker-compose.yml
File metadata and controls
67 lines (64 loc) · 3.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
services:
# Ollama service configuration - Main LLM service
ollama:
build:
context: . # Build context is the current directory
dockerfile: Dockerfile.ollama # Use the Ollama-specific Dockerfile
no_cache: true # Disable caching during the build process
user: "appuser:appgroup" # Run as user:group 1000:1000
container_name: ollama # Set a specific name for the container
restart: unless-stopped # Automatically restart container unless manually stopped
environment:
- OLLAMA_KEEP_ALIVE=-1 # Keep the service running indefinitely
- NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES:-all} # Configure GPU visibility, defaults to all
- OLLAMA_CONCURRENT_REQUESTS=${OLLAMA_CONCURRENT_REQUESTS:-1} # Number of concurrent requests, defaults to 1
- OLLAMA_QUEUE_ENABLED=${OLLAMA_QUEUE_ENABLED:-true} # Enable request queuing, defaults to true
- OLLAMA_CONTEXT_LENGTH=${OLLAMA_CONTEXT_LENGTH:-8192}
networks:
- ollama_network # Connect to the ollama_network bridge network
entrypoint: ["/usr/bin/bash", "/ollama-entrypoint.sh"]
deploy:
resources:
reservations:
devices:
- driver: nvidia # Specify NVIDIA as the GPU driver
count: all # Use all available GPUs
capabilities: [gpu] # Enable GPU capabilities
# FastAPI wrapper service configuration - API interface for Ollama
# No need for env on the port and host as it runs internal on our docker network
fastapi-wrapper:
build:
context: . # Build context is the current directory
dockerfile: Dockerfile.wrapper # Use the wrapper-specific Dockerfile
no_cache: true # Disable caching during the build process
user: "appuser:appgroup" # Run as user:group 1000:1000
container_name: authentication-ollama # Set container name
restart: unless-stopped # Automatic restart policy
environment:
- PYTHONUNBUFFERED=1 # Enable unbuffered Python output
- SESSION_API_KEY=${SESSION_API_KEY:-} # Optional API key for authentication
depends_on:
- ollama # Ensure Ollama service starts first
command: "uvicorn api_wrapper:app --host 0.0.0.0 --port 5000 --log-level debug" # Start FastAPI server
networks:
- ollama_network # Connect to the same network as Ollama
# Caddy service configuration - Reverse proxy and HTTPS
caddy:
container_name: caddy-ollama # Container name for Caddy service
restart: unless-stopped # Automatic restart policy
user: "appuser:appgroup" # Run as user:group 1000:1000
build:
context: . # Build context is current directory
dockerfile: Dockerfile.caddy # Use Caddy-specific Dockerfile
no_cache: true # Disable caching during the build process
environment:
- PUBLIC_ACCESS_PORT=${PUBLIC_ACCESS_PORT:-3334} # The public access port we are using
ports:
# Expose the service port, configurable via PUBLIC_ACCESS_PORT env var
- ${PUBLIC_ACCESS_PORT:-3334}:${PUBLIC_ACCESS_PORT:-3334}
networks:
- ollama_network # Connect to the same network as other services
# Network configuration
networks:
ollama_network:
driver: bridge # Create a bridge network for container communication