Skip to content

Commit 6043d17

Browse files
Merge pull request #4 from samuellimabraz/dev
Dev
2 parents e634392 + f3cc55f commit 6043d17

File tree

11 files changed

+934
-22
lines changed

11 files changed

+934
-22
lines changed

src/demo/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ dependencies = [
3131
"imbalanced-learn>=0.14.0",
3232
"gem-suite>=0.1.6",
3333
"quimb>=1.11.2",
34+
"numba>=0.57.0",
3435
"yfinance>=0.2.66",
3536
"plotly>=6.5.0",
3637
"kaleido>=1.2.0",
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
import { NextResponse } from 'next/server';
2+
3+
export interface RunPodHealth {
4+
jobs: {
5+
completed: number;
6+
failed: number;
7+
inProgress: number;
8+
inQueue: number;
9+
retried: number;
10+
};
11+
workers: {
12+
idle: number;
13+
initializing: number;
14+
running: number;
15+
throttled: number;
16+
};
17+
}
18+
19+
export interface StatusResponse {
20+
status: 'ready' | 'cold_start' | 'initializing' | 'processing' | 'unavailable';
21+
message: string;
22+
workers: {
23+
idle: number;
24+
running: number;
25+
initializing: number;
26+
};
27+
queue: {
28+
inProgress: number;
29+
inQueue: number;
30+
};
31+
estimatedWait?: number; // seconds
32+
}
33+
34+
/**
35+
* Check RunPod endpoint health to provide user feedback during cold starts
36+
*/
37+
export async function GET(): Promise<NextResponse<StatusResponse>> {
38+
const baseUrl = process.env.DEMO_MODEL_URL || 'http://localhost:8000/v1';
39+
const apiKey = process.env.DEMO_API_KEY || '';
40+
41+
// Extract RunPod endpoint URL from the vLLM base URL
42+
// vLLM URL format: https://api.runpod.ai/v2/{endpoint_id}/openai/v1
43+
// Health URL format: https://api.runpod.ai/v2/{endpoint_id}/health
44+
const runpodMatch = baseUrl.match(/https:\/\/api\.runpod\.ai\/v2\/([^/]+)/);
45+
46+
if (!runpodMatch) {
47+
// Not a RunPod endpoint, assume it's always ready (local/other provider)
48+
return NextResponse.json({
49+
status: 'ready',
50+
message: 'Model server ready',
51+
workers: { idle: 1, running: 0, initializing: 0 },
52+
queue: { inProgress: 0, inQueue: 0 },
53+
});
54+
}
55+
56+
const endpointId = runpodMatch[1];
57+
const healthUrl = `https://api.runpod.ai/v2/${endpointId}/health`;
58+
59+
try {
60+
const response = await fetch(healthUrl, {
61+
method: 'GET',
62+
headers: {
63+
'Authorization': `Bearer ${apiKey}`,
64+
'Content-Type': 'application/json',
65+
},
66+
// Short timeout for health check
67+
signal: AbortSignal.timeout(5000),
68+
});
69+
70+
if (!response.ok) {
71+
return NextResponse.json({
72+
status: 'unavailable',
73+
message: 'Unable to check model status',
74+
workers: { idle: 0, running: 0, initializing: 0 },
75+
queue: { inProgress: 0, inQueue: 0 },
76+
});
77+
}
78+
79+
const health: RunPodHealth = await response.json();
80+
81+
const totalWorkers = health.workers.idle + health.workers.running + (health.workers.initializing || 0);
82+
const hasActiveWorkers = totalWorkers > 0;
83+
const hasIdleWorkers = health.workers.idle > 0;
84+
const isInitializing = (health.workers.initializing || 0) > 0;
85+
const hasQueuedJobs = health.jobs.inQueue > 0;
86+
const hasRunningJobs = health.jobs.inProgress > 0;
87+
88+
let status: StatusResponse['status'];
89+
let message: string;
90+
let estimatedWait: number | undefined;
91+
92+
if (hasIdleWorkers) {
93+
status = 'ready';
94+
message = 'Model ready';
95+
} else if (isInitializing) {
96+
status = 'initializing';
97+
message = 'Model loading...';
98+
estimatedWait = 30; // Typical vLLM model load time
99+
} else if (health.workers.running > 0) {
100+
status = 'processing';
101+
message = hasQueuedJobs
102+
? `Processing (${health.jobs.inQueue} in queue)`
103+
: 'Processing request...';
104+
estimatedWait = hasQueuedJobs ? health.jobs.inQueue * 15 : undefined;
105+
} else if (!hasActiveWorkers && (hasQueuedJobs || hasRunningJobs)) {
106+
status = 'cold_start';
107+
message = 'Starting worker...';
108+
estimatedWait = 45; // Cold start + model load
109+
} else if (!hasActiveWorkers) {
110+
status = 'cold_start';
111+
message = 'Workers scaled to zero, will start on request';
112+
estimatedWait = 45;
113+
} else {
114+
status = 'ready';
115+
message = 'Model ready';
116+
}
117+
118+
return NextResponse.json({
119+
status,
120+
message,
121+
workers: {
122+
idle: health.workers.idle,
123+
running: health.workers.running,
124+
initializing: health.workers.initializing || 0,
125+
},
126+
queue: {
127+
inProgress: health.jobs.inProgress,
128+
inQueue: health.jobs.inQueue,
129+
},
130+
estimatedWait,
131+
});
132+
} catch (error) {
133+
console.error('Health check error:', error);
134+
135+
// Network error might indicate cold start
136+
return NextResponse.json({
137+
status: 'cold_start',
138+
message: 'Connecting to model server...',
139+
workers: { idle: 0, running: 0, initializing: 0 },
140+
queue: { inProgress: 0, inQueue: 0 },
141+
estimatedWait: 45,
142+
});
143+
}
144+
}
145+
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
import { NextResponse } from 'next/server';
2+
3+
export async function POST(): Promise<NextResponse> {
4+
const baseUrl = process.env.DEMO_MODEL_URL || 'http://localhost:8000/v1';
5+
const apiKey = process.env.DEMO_API_KEY || '';
6+
const modelName = process.env.DEMO_MODEL_NAME || 'default';
7+
8+
console.log('[Warmup] Starting warmup...');
9+
console.log('[Warmup] Base URL:', baseUrl);
10+
11+
const runpodMatch = baseUrl.match(/https:\/\/api\.runpod\.ai\/v2\/([^/]+)/);
12+
13+
if (!runpodMatch) {
14+
console.log('[Warmup] Not a RunPod endpoint, skipping');
15+
return NextResponse.json({
16+
status: 'skipped',
17+
message: 'Not a RunPod endpoint',
18+
});
19+
}
20+
21+
const endpointId = runpodMatch[1];
22+
console.log('[Warmup] Endpoint ID:', endpointId);
23+
24+
try {
25+
const healthUrl = `https://api.runpod.ai/v2/${endpointId}/health`;
26+
let healthData = null;
27+
28+
try {
29+
const healthResponse = await fetch(healthUrl, {
30+
method: 'GET',
31+
headers: {
32+
'Authorization': `Bearer ${apiKey}`,
33+
},
34+
signal: AbortSignal.timeout(5000),
35+
});
36+
37+
if (healthResponse.ok) {
38+
healthData = await healthResponse.json();
39+
console.log('[Warmup] Health:', JSON.stringify(healthData));
40+
41+
if (healthData.workers?.idle > 0) {
42+
console.log('[Warmup] Idle workers available');
43+
return NextResponse.json({
44+
status: 'ready',
45+
message: 'Workers already available',
46+
workers: healthData.workers,
47+
});
48+
}
49+
50+
if (healthData.workers?.initializing > 0) {
51+
console.log('[Warmup] Workers already initializing');
52+
return NextResponse.json({
53+
status: 'warming',
54+
message: 'Workers already starting',
55+
workers: healthData.workers,
56+
});
57+
}
58+
}
59+
} catch (e) {
60+
console.log('[Warmup] Health check error:', e);
61+
}
62+
63+
const openaiUrl = `${baseUrl}/chat/completions`;
64+
console.log('[Warmup] Sending to OpenAI endpoint:', openaiUrl);
65+
66+
const abortController = new AbortController();
67+
const timeoutId = setTimeout(() => abortController.abort(), 5000);
68+
69+
try {
70+
const warmupResponse = await fetch(openaiUrl, {
71+
method: 'POST',
72+
headers: {
73+
'Authorization': `Bearer ${apiKey}`,
74+
'Content-Type': 'application/json',
75+
},
76+
body: JSON.stringify({
77+
model: modelName,
78+
messages: [{ role: 'user', content: 'hi' }],
79+
max_tokens: 1,
80+
stream: false,
81+
}),
82+
signal: abortController.signal,
83+
});
84+
85+
clearTimeout(timeoutId);
86+
87+
console.log('[Warmup] Response status:', warmupResponse.status);
88+
89+
return NextResponse.json({
90+
status: warmupResponse.status === 200 ? 'ready' : 'warming',
91+
message: warmupResponse.status === 200
92+
? 'Model responded (was ready)'
93+
: 'Request queued, worker starting',
94+
httpStatus: warmupResponse.status,
95+
workers: healthData?.workers,
96+
});
97+
98+
} catch (fetchError) {
99+
clearTimeout(timeoutId);
100+
101+
if ((fetchError as Error).name === 'AbortError') {
102+
console.log('[Warmup] Request sent (aborted wait - worker starting)');
103+
return NextResponse.json({
104+
status: 'warming',
105+
message: 'Request sent, worker starting',
106+
workers: healthData?.workers,
107+
});
108+
}
109+
110+
throw fetchError;
111+
}
112+
113+
} catch (error) {
114+
console.error('[Warmup] Error:', error);
115+
return NextResponse.json({
116+
status: 'error',
117+
message: error instanceof Error ? error.message : 'Warmup failed',
118+
}, { status: 500 });
119+
}
120+
}
121+
122+
export async function GET(): Promise<NextResponse> {
123+
const baseUrl = process.env.DEMO_MODEL_URL || 'http://localhost:8000/v1';
124+
const apiKey = process.env.DEMO_API_KEY || '';
125+
126+
const runpodMatch = baseUrl.match(/https:\/\/api\.runpod\.ai\/v2\/([^/]+)/);
127+
128+
if (!runpodMatch) {
129+
return NextResponse.json({
130+
ready: true,
131+
message: 'Not a RunPod endpoint'
132+
});
133+
}
134+
135+
const endpointId = runpodMatch[1];
136+
const healthUrl = `https://api.runpod.ai/v2/${endpointId}/health`;
137+
138+
try {
139+
const response = await fetch(healthUrl, {
140+
method: 'GET',
141+
headers: {
142+
'Authorization': `Bearer ${apiKey}`,
143+
},
144+
signal: AbortSignal.timeout(10000),
145+
});
146+
147+
if (!response.ok) {
148+
console.log('[Warmup GET] Health check failed:', response.status);
149+
return NextResponse.json({ ready: false, message: 'Health check failed' });
150+
}
151+
152+
const health = await response.json();
153+
console.log('[Warmup GET] Health:', JSON.stringify(health));
154+
155+
const idleWorkers = health.workers?.idle || 0;
156+
const readyWorkers = health.workers?.ready || 0;
157+
const runningWorkers = health.workers?.running || 0;
158+
const initializingWorkers = health.workers?.initializing || 0;
159+
const throttledWorkers = health.workers?.throttled || 0;
160+
161+
const isReady = idleWorkers > 0 || readyWorkers > 0;
162+
const isWarming = initializingWorkers > 0;
163+
const isBusy = runningWorkers > 0 && !isReady;
164+
const jobsInQueue = health.jobs?.inQueue || 0;
165+
const jobsInProgress = health.jobs?.inProgress || 0;
166+
167+
return NextResponse.json({
168+
ready: isReady,
169+
warming: isWarming,
170+
busy: isBusy,
171+
jobsInQueue,
172+
jobsInProgress,
173+
workers: {
174+
idle: idleWorkers,
175+
ready: readyWorkers,
176+
running: runningWorkers,
177+
initializing: initializingWorkers,
178+
throttled: throttledWorkers,
179+
},
180+
});
181+
} catch (error) {
182+
const isTimeout = error instanceof Error && error.name === 'TimeoutError';
183+
if (!isTimeout) {
184+
console.error('[Warmup GET] Error:', error);
185+
}
186+
return NextResponse.json({
187+
ready: false,
188+
warming: true,
189+
message: isTimeout ? 'Health check timed out' : 'Check failed'
190+
});
191+
}
192+
}

0 commit comments

Comments
 (0)