-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathdocker-compose.amd.yml
More file actions
executable file
·89 lines (85 loc) · 3.29 KB
/
docker-compose.amd.yml
File metadata and controls
executable file
·89 lines (85 loc) · 3.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Docker Compose for AMD GPUs (ROCm)
#
# Requirements:
# - AMD GPU with ROCm support:
# - Radeon RX 6000/7000 series (discrete)
# - Radeon Instinct (MI series)
# - Radeon Pro series
# - AMD APUs (Ryzen AI 300 series, Ryzen 8000G)
# - ROCm runtime installed: https://rocm.docs.amd.com/en/latest/deploy/linux/index.html
# - Verify: rocm-smi should work on host (optional, sysfs fallback available)
#
# Quick Start:
# 1. Copy this file to your project root: cp docs/docker-compose.amd.yml docker-compose.yml
# 2. Copy environment template: cp ENV_DEFAULT .env
# 3. Get group IDs and add to .env:
# echo "RENDER_GID=$(getent group render | cut -d: -f3)" >> .env
# echo "VIDEO_GID=$(getent group video | cut -d: -f3)" >> .env
# 4. Run: docker-compose up -d
#
# Verify GPU access:
# docker exec smarterrouter ls /sys/class/drm
# docker logs smarterrouter | grep -i "amd\|gpu"
#
# Troubleshooting:
# - If GPU not detected, check: ls /sys/class/drm/card*/device/mem_info_vram_total
# - Ensure user is in render/video groups: groups $USER
# - For compute workloads, you may need a ROCm base image (see below)
#
# APU (Unified Memory) Setup:
# AMD APUs like Ryzen AI 300 series use unified memory (CPU+GPU share RAM).
# The router auto-detects APUs and uses GTT pool instead of VRAM carve-out.
# - BIOS setting: Set UMA Frame Buffer to MINIMUM (512MB-2GB), NOT maximum
# - If auto-detection fails, set: ROUTER_AMD_UNIFIED_MEMORY_GB=58 (for 64GB RAM)
#
# ROCm Base Image Option:
# For full ROCm compute support, build from ROCm base:
# ```dockerfile
# FROM rocm/pytorch:rocm6.0_ubuntu22.04_py3.9_pytorch_2.0.1
# COPY . /app
# WORKDIR /app
# RUN pip install -r requirements.txt
# CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "11436"]
# ```
services:
smarterrouter:
image: ghcr.io/peva3/smarterrouter:latest
container_name: smarterrouter
ports:
- "11436:11436"
env_file:
- .env
volumes:
- ./data:/app/data:rw
- type: tmpfs
target: /tmp
restart: unless-stopped
networks:
- smarterrouter-network
healthcheck:
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:11436/health')"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
# AMD GPU device passthrough
devices:
- /dev/kfd # AMD Kernel Fusion Driver (required for ROCm)
- /dev/dri # Direct Rendering Infrastructure (required for GPU access)
# AMD GPU group permissions (required for non-root access)
# Get the group IDs on your host:
# RENDER_GID=$(getent group render | cut -d: -f3)
# VIDEO_GID=$(getent group video | cut -d: -f3)
# Then set in .env or uncomment and hardcode:
group_add:
- ${RENDER_GID:-109} # render group ID (default 109 on Ubuntu)
- ${VIDEO_GID:-44} # video group ID (default 44 on Ubuntu)
# Optional: ROCm environment
# environment:
# - ROCM_PATH=/opt/rocm
# - HIP_VISIBLE_DEVICES=0 # Limit to specific GPU (0, 1, etc.)
# - ROUTER_VRAM_MAX_TOTAL_GB=16
# - ROUTER_AMD_UNIFIED_MEMORY_GB=58 # For APUs: override unified memory (optional)
networks:
smarterrouter-network:
driver: bridge