llm-d-planner/docker-compose.yml at main · llm-d-incubation/llm-d-planner · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
services:
  # PostgreSQL database for benchmarks and deployment outcomes
  postgres:
    image: postgres:latest
    container_name: neuralnav-postgres
    environment:
      POSTGRES_DB: neuralnav
      POSTGRES_USER: neuralnav
      POSTGRES_PASSWORD: neuralnav_dev_password
    volumes:
      - postgres_data:/var/lib/postgresql
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U neuralnav"]
      interval: 10s
      timeout: 5s
      retries: 5
    networks:
      - neuralnav-network

  # Ollama for LLM-powered intent extraction
  ollama:
    image: ollama/ollama:latest
    container_name: neuralnav-ollama
    volumes:
      - ollama_data:/root/.ollama
    ports:
      - "11434:11434"
    healthcheck:
      test: ["CMD", "bash", "-c", "cat < /dev/null > /dev/tcp/localhost/11434"]  # Check if port is open, the ollama image doesn't have curl installed
      interval: 30s
      timeout: 10s
      retries: 5
    networks:
      - neuralnav-network
    # Pull the model on startup
    entrypoint: ["/bin/sh", "-c"]
    command:
      - |
        ollama serve &
        sleep 10
        ollama pull qwen2.5:7b
        wait

  # Backend API (FastAPI)
  backend:
    platform: linux/amd64
    build:
      context: .
      dockerfile: Dockerfile
    container_name: neuralnav-backend
    environment:
      # Database configuration
      DATABASE_URL: postgresql://neuralnav:neuralnav_dev_password@postgres:5432/neuralnav

      # Ollama configuration
      OLLAMA_HOST: http://ollama:11434
      OLLAMA_MODEL: qwen2.5:7b

      # API configuration
      API_HOST: 0.0.0.0
      API_PORT: 8000

      # Enable CORS for local development
      CORS_ORIGINS: http://localhost:8501,http://ui:8501
    volumes:
      # Mount source code for development (hot reload)
      - ./src/neuralnav:/app/src/neuralnav
      - ./data:/app/data
      - ./generated_configs:/app/generated_configs
      - ./logs:/app/logs
    ports:
      - "8000:8000"
    depends_on:
      postgres:
        condition: service_healthy
      ollama:
        condition: service_healthy
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 40s
    networks:
      - neuralnav-network
    restart: unless-stopped

  # UI (Streamlit)
  ui:
    platform: linux/amd64
    build:
      context: .
      dockerfile: ui/Dockerfile
    container_name: neuralnav-ui
    environment:
      # Backend API URL
      API_BASE_URL: http://backend:8000

      # Streamlit configuration
      STREAMLIT_SERVER_PORT: 8501
      STREAMLIT_SERVER_ADDRESS: 0.0.0.0
      STREAMLIT_SERVER_HEADLESS: "true"
      STREAMLIT_BROWSER_GATHER_USAGE_STATS: "false"
    volumes:
      # Mount source code for development (hot reload)
      - ./ui:/app/ui
      - ./.streamlit:/app/.streamlit
    ports:
      - "8501:8501"
    depends_on:
      backend:
        condition: service_healthy
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8501/_stcore/health"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 40s
    networks:
      - neuralnav-network
    restart: unless-stopped

  # vLLM Simulator (optional - for GPU-free development)
  simulator:
    build:
      context: ./simulator
      dockerfile: Dockerfile
    container_name: neuralnav-simulator
    environment:
      MODEL_NAME: meta-llama/Llama-3.1-8B-Instruct
      TENSOR_PARALLEL_SIZE: "1"
      GPU_TYPE: H100
      PORT: "8080"
    ports:
      - "8080:8080"
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s
    networks:
      - neuralnav-network
    profiles:
      - simulator
    restart: unless-stopped

volumes:
  postgres_data:
    driver: local
  ollama_data:
    driver: local

networks:
  neuralnav-network:
    driver: bridge