vllm-proxy/.env.example at main · nearai/vllm-proxy · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# ============================================================================
# vllm-proxy Environment Variables
# ============================================================================

# ----------------------------------------------------------------------------
# Required Configuration
# ----------------------------------------------------------------------------

# Model name used for key derivation and cache namespacing
# Example: "deepseek-ai/DeepSeek-V3.1" or "openai/gpt-oss-120b"
MODEL_NAME=

# Authentication token for API access
# Used to verify Authorization header: "Bearer <TOKEN>"
TOKEN=

# ----------------------------------------------------------------------------
# vLLM Backend Configuration
# ----------------------------------------------------------------------------

# Base URL for the vLLM backend service
# Default: "http://vllm:8000"
VLLM_BASE_URL=http://vllm:8000

# Maximum number of concurrent connections to vLLM backend
# Default: 1000
VLLM_PROXY_MAX_CONNECTIONS=1000

# Maximum number of keepalive connections to vLLM backend
# Default: 100
VLLM_PROXY_MAX_KEEPALIVE=100

# ----------------------------------------------------------------------------
# Server Configuration
# ----------------------------------------------------------------------------

# Maximum number of concurrent requests before returning 503
# Set to 0 or empty to disable limit
# Default: 0 (unlimited)
VLLM_PROXY_LIMIT_CONCURRENCY=0

# Connection backlog - queue size for pending connections
# Default: 2048
VLLM_PROXY_BACKLOG=2048

# Keep-alive timeout in seconds
# Default: 30
VLLM_PROXY_KEEPALIVE_TIMEOUT=30

# ----------------------------------------------------------------------------
# Redis Cache Configuration
# ----------------------------------------------------------------------------

# Redis host address
# If not set, Redis caching is disabled and only local cache is used
# Default: "localhost"
REDIS_HOST=localhost

# Redis port
# Default: 6379
REDIS_PORT=6379

# Redis password (optional)
# Leave empty if no password is required
REDIS_PASSWORD=

# Redis database number
# Default: 0
REDIS_DB=0

# Chat cache expiration time in seconds
# Default: 1200 (20 minutes)
CHAT_CACHE_EXPIRATION=1200

# ----------------------------------------------------------------------------
# Development & Testing
# ----------------------------------------------------------------------------

# Enable development mode
# When enabled, uses random key generation instead of KMS key derivation
# This allows testing without KMS availability
# Set to "1", "true", or "yes" to enable
# Default: "0" (disabled)
DEV=0

# GPU hardware mode override
# Set to "1", "true", or "yes" to disable GPU hardware requirements
# Uses canned evidence for testing without GPU
# Default: "0" (disabled)
GPU_NO_HW_MODE=0