-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathnginx.conf
More file actions
134 lines (111 loc) · 3.9 KB
/
Copy pathnginx.conf
File metadata and controls
134 lines (111 loc) · 3.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# Nginx configuration for Hailo Ollama Gateway
#
# This proxies requests to the FastAPI gateway service which translates
# Ollama-compatible REST calls to Hailo RPC calls.
#
# Usage:
# 1. Start the gateway: python hailo_ollama_gateway.py
# 2. Include this config in nginx: include /path/to/nginx.conf;
# 3. Or copy the server block to your nginx.conf
upstream hailo_gateway {
server 127.0.0.1:11434;
keepalive 32;
}
server {
listen 80;
listen [::]:80;
server_name localhost hailo.local;
# Optional: Redirect HTTP to HTTPS
# return 301 https://$host$request_uri;
# Large request bodies for model uploads
client_max_body_size 10G;
# Timeouts for long-running inference
proxy_connect_timeout 300s;
proxy_send_timeout 300s;
proxy_read_timeout 300s;
# Health check
location = / {
proxy_pass http://hailo_gateway/;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
# Ollama API endpoints
location /api/ {
proxy_pass http://hailo_gateway/api/;
proxy_http_version 1.1;
# Required headers
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# Streaming support (Server-Sent Events / NDJSON)
proxy_set_header Connection '';
proxy_buffering off;
proxy_cache off;
chunked_transfer_encoding on;
# Disable response buffering for real-time streaming
proxy_request_buffering off;
}
# Generate endpoint - streaming inference
location = /api/generate {
proxy_pass http://hailo_gateway/api/generate;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# Critical for streaming
proxy_buffering off;
proxy_cache off;
proxy_set_header Connection '';
chunked_transfer_encoding on;
# Extended timeout for long generations
proxy_read_timeout 600s;
}
# Chat endpoint - streaming inference
location = /api/chat {
proxy_pass http://hailo_gateway/api/chat;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# Critical for streaming
proxy_buffering off;
proxy_cache off;
proxy_set_header Connection '';
chunked_transfer_encoding on;
# Extended timeout for long generations
proxy_read_timeout 600s;
}
# Model pull endpoint - can take a while
location = /api/pull {
proxy_pass http://hailo_gateway/api/pull;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_buffering off;
proxy_cache off;
proxy_read_timeout 3600s; # Model loading can take time
}
# Error pages
error_page 502 503 504 /50x.html;
location = /50x.html {
default_type text/html;
return 503 '<!DOCTYPE html><html><body><h1>Hailo Gateway Unavailable</h1><p>The Hailo Ollama Gateway service is not running.</p></body></html>';
}
}
# HTTPS server (optional - uncomment and configure if needed)
# server {
# listen 443 ssl http2;
# listen [::]:443 ssl http2;
# server_name localhost hailo.local;
#
# ssl_certificate /path/to/cert.pem;
# ssl_certificate_key /path/to/key.pem;
#
# # Copy location blocks from above...
# }