dInfer/docker-compose.yaml at f3725180f77c981519376fc6e56fd82af9fdb4ca · inclusionAI/dInfer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# dInfer Docker Compose example
#
# Prerequisites:
#   - NVIDIA Container Toolkit installed
#   - Docker Compose v2.x with GPU support
#
# Example usage:
#
#   - Run benchmark with sample prompt:
#      docker compose run --rm dinfer python benchmarks/benchmark.py \
#        --model_name /models/LLaDA-MoE-7B-A1B-Instruct --model_type llada_moe \
#        --gpu 0,1,2,3 --use_tp
#
#   - Convert a MoE model to FusedMoE format:
#      docker compose run --rm dinfer python -m tools.transfer \
#        --input /models/LLaDA-MoE-7B-A1B-Instruct \
#        --output /models/LLaDA-MoE-7B-A1B-Instruct-fused
#
#   - Interactive Python session:
#      docker compose run --rm dinfer python

services:
  &name dinfer:
    hostname: *name
    container_name: *name
    build:
      context: https://github.com/sammcj/dInfer.git#master
      dockerfile: Dockerfile
    image: dinfer:latest
    stdin_open: true
    tty: true
    volumes:
      - ./models:/models
    environment:
      - HF_HUB_ENABLE_HF_TRANSFER=1
      - TOKENIZERS_PARALLELISM=false
    ipc: host
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]