ollama_gateway/nginx.conf at main · hackdefendr/ollama_gateway · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# Nginx configuration for Hailo Ollama Gateway
#
# This proxies requests to the FastAPI gateway service which translates
# Ollama-compatible REST calls to Hailo RPC calls.
#
# Usage:
#   1. Start the gateway: python hailo_ollama_gateway.py
#   2. Include this config in nginx: include /path/to/nginx.conf;
#   3. Or copy the server block to your nginx.conf

upstream hailo_gateway {
    server 127.0.0.1:11434;
    keepalive 32;
}

server {
    listen 80;
    listen [::]:80;
    server_name localhost hailo.local;

    # Optional: Redirect HTTP to HTTPS
    # return 301 https://$host$request_uri;

    # Large request bodies for model uploads
    client_max_body_size 10G;

    # Timeouts for long-running inference
    proxy_connect_timeout 300s;
    proxy_send_timeout 300s;
    proxy_read_timeout 300s;

    # Health check
    location = / {
        proxy_pass http://hailo_gateway/;
        proxy_http_version 1.1;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
    }

    # Ollama API endpoints
    location /api/ {
        proxy_pass http://hailo_gateway/api/;
        proxy_http_version 1.1;

        # Required headers
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;

        # Streaming support (Server-Sent Events / NDJSON)
        proxy_set_header Connection '';
        proxy_buffering off;
        proxy_cache off;
        chunked_transfer_encoding on;

        # Disable response buffering for real-time streaming
        proxy_request_buffering off;
    }

    # Generate endpoint - streaming inference
    location = /api/generate {
        proxy_pass http://hailo_gateway/api/generate;
        proxy_http_version 1.1;

        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;

        # Critical for streaming
        proxy_buffering off;
        proxy_cache off;
        proxy_set_header Connection '';
        chunked_transfer_encoding on;

        # Extended timeout for long generations
        proxy_read_timeout 600s;
    }

    # Chat endpoint - streaming inference
    location = /api/chat {
        proxy_pass http://hailo_gateway/api/chat;
        proxy_http_version 1.1;

        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;

        # Critical for streaming
        proxy_buffering off;
        proxy_cache off;
        proxy_set_header Connection '';
        chunked_transfer_encoding on;

        # Extended timeout for long generations
        proxy_read_timeout 600s;
    }

    # Model pull endpoint - can take a while
    location = /api/pull {
        proxy_pass http://hailo_gateway/api/pull;
        proxy_http_version 1.1;

        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;

        proxy_buffering off;
        proxy_cache off;
        proxy_read_timeout 3600s;  # Model loading can take time
    }

    # Error pages
    error_page 502 503 504 /50x.html;
    location = /50x.html {
        default_type text/html;
        return 503 '<!DOCTYPE html><html><body><h1>Hailo Gateway Unavailable</h1><p>The Hailo Ollama Gateway service is not running.</p></body></html>';
    }
}

# HTTPS server (optional - uncomment and configure if needed)
# server {
#     listen 443 ssl http2;
#     listen [::]:443 ssl http2;
#     server_name localhost hailo.local;
#
#     ssl_certificate /path/to/cert.pem;
#     ssl_certificate_key /path/to/key.pem;
#
#     # Copy location blocks from above...
# }