-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
164 lines (157 loc) · 5.28 KB
/
Copy pathdocker-compose.yml
File metadata and controls
164 lines (157 loc) · 5.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
services:
# Frontend service (Vite)
frontend:
build:
context: .
target: dev
ports:
- "5173:5173"
environment:
- BUN_ENV=development
- VITE_API_URL=http://localhost:3000
command: ["bunx", "vite", "--host", "0.0.0.0"]
develop:
watch:
- action: sync
path: ./src/client
target: /app/src/client
- action: sync
path: ./src/assets
target: /app/src/assets
- action: sync
path: ./public
target: /app/public
- action: sync
path: ./index.html
target: /app/index.html
- action: sync
path: ./vite.config.ts
target: /app/vite.config.ts
- action: rebuild
path: package.json
networks:
- app-network
# --- AI Model Service (Ollama) --
ollama:
image: ollama/ollama:0.16.3 # This official Docker ollama image includes all necessary CUDA libraries for GPU support. Pulling latest version can cause changes in how certain models are run.
# You can try lightweight but unofficial CPU-only version alpine/ollama ((https://hub.docker.com/r/alpine/ollama) which is only around 70 MB.
container_name: ollama-shared # rename as needed
ports:
- "11434:11434"
volumes:
- ${OLLAMA_MODELS_PATH:-/home/cynth/ollama_models_shared}:/root/.ollama/models # Docker volume to bind mount pointing to a local shared models folder
# - ollama_models:/root/.ollama # persists downloaded models
# NOTE: we should remove hard-coded models path and put env variable instead
init: true # handles zombie processes, recommended for containers
deploy: # deploy (lines 46-52) is more modern syntax vs. runtime: nvidia (legacy) but does the same thing
resources:
reservations:
devices:
- driver: nvidia
count: all # if you have multiple GPUs
capabilities: [gpu]
environment:
- OLLAMA_HOST=0.0.0.0 # listen on all interfaces
- OLLAMA_PORT=11434
- OLLAMA_NUM_PARALLEL=4 # number of parallel requests the model can handle
- OLLAMA_MAX_LOADED_MODELS=2 # max models kept in memory simultaneously
- OLLAMA_KEEP_ALIVE=-1 # keep models loaded forever (-1 = infinite)
- OLLAMA_DEBUG=0
- OLLAMA_CONTEXT_LENGTH=8192 # max context window size in tokens (affects VRAM)
- NVIDIA_VISIBLE_DEVICES=all # which GPUs to expose to container
- NVIDIA_DRIVER_CAPABILITIES=compute,utility # required GPU capabilities (ensures neccesary CUDA drivers loaded inside container)
restart: unless-stopped
healthcheck:
test: [ "CMD", "ollama", "list" ] # checks if Ollama is responding
interval: 30s
timeout: 10s
retries: 3
start_period: 20s # gives Ollama time to start up
networks:
- app-network
# --- Backend Service ---
backend:
build:
context: .
target: dev
ports:
- "3000:3000"
volumes:
- ./src/server/aiTest:/app/src/server/aiTest
environment:
- DATABASE_URL=postgresql://root:root@db:5432/test_db
- NODE_ENV=development
- PORT=3000
- DB_HOST=db
- DB_PORT=5432
- DB_NAME=test_db
- DB_USER=root
- DB_PASSWORD=root
- MODEL_URL=${MODEL_URL:-http://ollama:11434/v1/chat/completions} # Ollama OpenAI-compatible endpoint
- TEXT2SQL_MODEL=${TEXT2SQL_MODEL:-arctic-text2sql:latest} # alternative model option
# - TEXT2SQL_MODEL=${TEXT2SQL_MODEL:-distil-qwen3-4b:latest} # model for text-to-SQL tasks
- AI_RESPONSE_MODEL=${AIRESP_MODEL:-qwen2.5-coder:7b} # main AI response model
- JUDGE_MODEL=${JUDGE_MODEL:-qwen2.5-coder:7b} # model for evaluating/validating responses
# - JUDGE_MODEL=${JUDGE_MODEL:-qwen2.5-coder:14b} # larger judge model option (needs more VRAM)
- MODEL_REGISTRY_URL=http://ollama:11434/api/tags
- HARDWARE_TIER=4090-workstation
command: ["bun", "run", "server"]
develop:
watch:
- action: sync+restart
path: ./src/server
target: /app/src/server
ignore:
- "**/*.log"
- "**/*.tmp"
- action: sync+restart
path: ./src/shared
target: /app/src/shared
- action: rebuild
path: package.json
depends_on:
db:
condition: service_started
ollama:
condition: service_healthy
networks:
- app-network
# --- PostgreSQL Database ---
db:
image: postgres:16-alpine
restart: always
shm_size: 128mb
environment:
POSTGRES_USER: root
POSTGRES_PASSWORD: root # NOTE: we'd remove these hard-coded passwords (since not really secure)
POSTGRES_DB: test_db
ports:
- "5432:5432"
volumes:
- postgres_offline_data:/var/lib/postgresql/data
networks:
- app-network
# --- pgAdmin GUI ---
pgadmin:
container_name: pgadmin4_container
image: dpage/pgadmin4
restart: always
environment:
PGADMIN_DEFAULT_EMAIL: admin@admin.com
PGADMIN_DEFAULT_PASSWORD: root
ports:
- "5050:80"
depends_on:
- db
volumes:
- pgadmin_offline_data:/var/lib/pgadmin
networks:
- app-network
# --- Networks ---
networks:
app-network:
driver: bridge
# --- Volumes ---
volumes:
postgres_offline_data:
pgadmin_offline_data: