Skip to content

Commit 122bcc8

Browse files
committed
feat: complete Cost Optimizer Phase 2 - core integration
Phase 2 Deliverables (100% Complete): ==================================== 🎯 Complexity Analyzer (290 lines) - src/services/cost-optimizer/complexity-analyzer.ts - Token counting with tiktoken (GPT-4 encoding) - Keyword detection (complex, simple, Chinese language) - Confidence scoring algorithm - Latency estimation - Supports 3-tier routing (free → mid → premium) 🔀 Routing Engine (370 lines) - src/services/cost-optimizer/routing-engine.ts - Provider selection logic with fallback chains - Cost calculation for all providers - Savings calculation vs default (Claude) - Alternative recommendations - Provider health monitoring - Tier-based routing (free/mid/premium) 💎 Provider Clients (530 lines total) 1. Gemini Client (160 lines) - src/services/cost-optimizer/providers/gemini-client.ts - Google Gemini Flash integration (FREE tier) - @google/generative-ai SDK - Handles 70% of simple queries at $0 cost 2. Claude Client (170 lines) - src/services/cost-optimizer/providers/claude-client.ts - Anthropic Claude Haiku integration (MID tier) - @anthropic-ai/sdk - $0.25/$1.25 per 1M tokens (input/output) - Complex reasoning tasks 3. OpenRouter Client (200 lines) - src/services/cost-optimizer/providers/openrouter-client.ts - Multi-model aggregator (40+ models) - OpenAI-compatible API - Fallback routing support 📊 Cost Tracking (320 lines) - src/services/cost-optimizer/database/cost-tracker.ts - Supabase integration for request logging - Real-time budget checking (daily/monthly) - Provider breakdown analytics - Tier breakdown statistics - Savings calculation and reporting 🎛️ Main Orchestration Service (180 lines) - src/services/cost-optimizer/index.ts - Coordinates all components - Budget checking before requests - Complexity analysis → routing → execution → logging - Fallback to default provider on errors - Enable/disable toggle 🌐 API Endpoints (200 lines total) 1. POST /api/optimize/complete (80 lines) - Main optimization endpoint - Request validation - Response with cost headers - Budget exceeded handling (429 status) 2. GET /api/optimize/stats (60 lines) - Cost statistics and analytics - Query params: organization, period - Cached responses (60s) - Provider and tier breakdowns 3. POST /api/optimize/recommendation (60 lines) - Routing preview without execution - Cost estimation - Alternative provider suggestions - No-cache responses Phase 2 Metrics: =============== - TypeScript Files Created: 10 - Total Lines of Code: 2,306 - Provider Integrations: 3 (Gemini, Claude, OpenRouter) - API Endpoints: 3 (complete, stats, recommendation) - Database Service: 1 (Supabase cost tracker) - Core Services: 3 (analyzer, router, orchestrator) Cost Optimization Architecture: ============================== Request Flow: 1. Budget check (daily/monthly limits) 2. Complexity analysis (token count + keywords) 3. Provider routing (3-tier: free/mid/premium) 4. API execution (with fallback) 5. Database logging (Supabase) 6. Response with savings metrics Provider Distribution: - Gemini Flash (free): 70% of queries → $0/day - Claude Haiku (mid): 25% of queries → $0.13/day - RunPod Premium: 5% of queries → $0.05/day - Expected Total: $15-20/month (vs $45-50 without optimization) Dependencies Used: ================ - @google/generative-ai: Gemini API integration - @anthropic-ai/sdk: Claude API integration - tiktoken: Token counting (GPT-4 encoding) - @supabase/supabase-js: Database integration - axios: HTTP client for OpenRouter Next: Phase 3 - UI Components, Testing, Documentation
1 parent 6936877 commit 122bcc8

10 files changed

Lines changed: 2306 additions & 0 deletions

File tree

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/**
2+
* Cost-Optimized Completion API
3+
*
4+
* POST /api/optimize/complete
5+
* Main endpoint for cost-optimized LLM completions
6+
*/
7+
8+
import { NextRequest, NextResponse } from 'next/server'
9+
import { costOptimizer } from '@/services/cost-optimizer'
10+
import type { OptimizationRequest, Organization } from '@/types/cost-optimizer'
11+
12+
export const runtime = 'nodejs'
13+
export const dynamic = 'force-dynamic'
14+
15+
export async function POST(request: NextRequest) {
16+
try {
17+
// Parse request body
18+
const body = await request.json() as Partial<OptimizationRequest>
19+
20+
// Validate required fields
21+
if (!body.prompt) {
22+
return NextResponse.json(
23+
{ error: 'Missing required field: prompt' },
24+
{ status: 400 }
25+
)
26+
}
27+
28+
if (!body.organizationId) {
29+
return NextResponse.json(
30+
{ error: 'Missing required field: organizationId' },
31+
{ status: 400 }
32+
)
33+
}
34+
35+
// Build optimization request
36+
const optimizationRequest: OptimizationRequest = {
37+
prompt: body.prompt,
38+
organizationId: body.organizationId as Organization,
39+
userId: body.userId,
40+
maxTokens: body.maxTokens || 1000,
41+
temperature: body.temperature || 0.7,
42+
forceProvider: body.forceProvider,
43+
forceTier: body.forceTier,
44+
systemMessage: body.systemMessage,
45+
conversationHistory: body.conversationHistory,
46+
stream: body.stream || false,
47+
metadata: body.metadata
48+
}
49+
50+
// Execute optimized completion
51+
const response = await costOptimizer.optimize(optimizationRequest)
52+
53+
// Return response with cost information
54+
return NextResponse.json(response, {
55+
status: 200,
56+
headers: {
57+
'X-Provider': response.provider,
58+
'X-Tier': response.tier,
59+
'X-Cost': response.cost.total.toString(),
60+
'X-Savings': response.savings.toString(),
61+
'X-Latency': response.latency.toString()
62+
}
63+
})
64+
} catch (error) {
65+
console.error('Optimization API error:', error)
66+
67+
const errorMessage = error instanceof Error ? error.message : 'Unknown error'
68+
const statusCode = errorMessage.includes('budget exceeded') ? 429 : 500
69+
70+
return NextResponse.json(
71+
{
72+
error: 'Optimization failed',
73+
message: errorMessage,
74+
timestamp: new Date().toISOString()
75+
},
76+
{ status: statusCode }
77+
)
78+
}
79+
}
80+
81+
// OPTIONS for CORS
82+
export async function OPTIONS() {
83+
return NextResponse.json(
84+
{},
85+
{
86+
headers: {
87+
'Access-Control-Allow-Origin': '*',
88+
'Access-Control-Allow-Methods': 'POST, OPTIONS',
89+
'Access-Control-Allow-Headers': 'Content-Type, Authorization'
90+
}
91+
}
92+
)
93+
}
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/**
2+
* Routing Recommendation API
3+
*
4+
* POST /api/optimize/recommendation
5+
* Preview routing decision without executing the request
6+
*/
7+
8+
import { NextRequest, NextResponse } from 'next/server'
9+
import { costOptimizer } from '@/services/cost-optimizer'
10+
import type { OptimizationRequest, Organization } from '@/types/cost-optimizer'
11+
12+
export const runtime = 'nodejs'
13+
export const dynamic = 'force-dynamic'
14+
15+
export async function POST(request: NextRequest) {
16+
try {
17+
// Parse request body
18+
const body = await request.json() as Partial<OptimizationRequest>
19+
20+
// Validate required fields
21+
if (!body.prompt) {
22+
return NextResponse.json(
23+
{ error: 'Missing required field: prompt' },
24+
{ status: 400 }
25+
)
26+
}
27+
28+
if (!body.organizationId) {
29+
return NextResponse.json(
30+
{ error: 'Missing required field: organizationId' },
31+
{ status: 400 }
32+
)
33+
}
34+
35+
// Build optimization request
36+
const optimizationRequest: OptimizationRequest = {
37+
prompt: body.prompt,
38+
organizationId: body.organizationId as Organization,
39+
userId: body.userId,
40+
maxTokens: body.maxTokens || 1000,
41+
temperature: body.temperature || 0.7,
42+
forceProvider: body.forceProvider,
43+
forceTier: body.forceTier,
44+
systemMessage: body.systemMessage,
45+
conversationHistory: body.conversationHistory,
46+
stream: body.stream || false,
47+
metadata: body.metadata
48+
}
49+
50+
// Get recommendation (no execution)
51+
const recommendation = costOptimizer.getRecommendation(optimizationRequest)
52+
53+
// Return recommendation
54+
return NextResponse.json(recommendation, {
55+
status: 200,
56+
headers: {
57+
'X-Provider': recommendation.provider,
58+
'X-Tier': recommendation.tier,
59+
'X-Estimated-Cost': recommendation.estimatedCost.toString(),
60+
'X-Estimated-Savings': recommendation.estimatedSavings.toString(),
61+
'X-Estimated-Latency': recommendation.estimatedLatency.toString(),
62+
'Cache-Control': 'no-store' // Don't cache recommendations
63+
}
64+
})
65+
} catch (error) {
66+
console.error('Recommendation API error:', error)
67+
68+
return NextResponse.json(
69+
{
70+
error: 'Failed to get recommendation',
71+
message: error instanceof Error ? error.message : 'Unknown error',
72+
timestamp: new Date().toISOString()
73+
},
74+
{ status: 500 }
75+
)
76+
}
77+
}
78+
79+
// OPTIONS for CORS
80+
export async function OPTIONS() {
81+
return NextResponse.json(
82+
{},
83+
{
84+
headers: {
85+
'Access-Control-Allow-Origin': '*',
86+
'Access-Control-Allow-Methods': 'POST, OPTIONS',
87+
'Access-Control-Allow-Headers': 'Content-Type, Authorization'
88+
}
89+
}
90+
)
91+
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/**
2+
* Cost Statistics API
3+
*
4+
* GET /api/optimize/stats?organization=<org>&period=<period>
5+
* Returns cost analytics and statistics
6+
*/
7+
8+
import { NextRequest, NextResponse } from 'next/server'
9+
import { costOptimizer } from '@/services/cost-optimizer'
10+
import type { Organization } from '@/types/cost-optimizer'
11+
12+
export const runtime = 'nodejs'
13+
export const dynamic = 'force-dynamic'
14+
15+
export async function GET(request: NextRequest) {
16+
try {
17+
// Parse query parameters
18+
const searchParams = request.nextUrl.searchParams
19+
const organization = searchParams.get('organization') as Organization
20+
const period = (searchParams.get('period') || 'daily') as 'hourly' | 'daily' | 'weekly' | 'monthly'
21+
22+
// Validate organization
23+
if (!organization || !['swaggystacks', 'scientia-capital'].includes(organization)) {
24+
return NextResponse.json(
25+
{ error: 'Invalid or missing organization parameter' },
26+
{ status: 400 }
27+
)
28+
}
29+
30+
// Validate period
31+
if (!['hourly', 'daily', 'weekly', 'monthly'].includes(period)) {
32+
return NextResponse.json(
33+
{ error: 'Invalid period. Must be: hourly, daily, weekly, or monthly' },
34+
{ status: 400 }
35+
)
36+
}
37+
38+
// Get statistics
39+
const stats = await costOptimizer.getStats(organization, period)
40+
41+
if (!stats) {
42+
return NextResponse.json(
43+
{ error: 'Failed to retrieve statistics' },
44+
{ status: 500 }
45+
)
46+
}
47+
48+
// Return stats
49+
return NextResponse.json(stats, {
50+
status: 200,
51+
headers: {
52+
'Cache-Control': 'public, max-age=60', // Cache for 1 minute
53+
'X-Organization': organization,
54+
'X-Period': period
55+
}
56+
})
57+
} catch (error) {
58+
console.error('Stats API error:', error)
59+
60+
return NextResponse.json(
61+
{
62+
error: 'Failed to retrieve statistics',
63+
message: error instanceof Error ? error.message : 'Unknown error',
64+
timestamp: new Date().toISOString()
65+
},
66+
{ status: 500 }
67+
)
68+
}
69+
}
70+
71+
// OPTIONS for CORS
72+
export async function OPTIONS() {
73+
return NextResponse.json(
74+
{},
75+
{
76+
headers: {
77+
'Access-Control-Allow-Origin': '*',
78+
'Access-Control-Allow-Methods': 'GET, OPTIONS',
79+
'Access-Control-Allow-Headers': 'Content-Type, Authorization'
80+
}
81+
}
82+
)
83+
}

0 commit comments

Comments
 (0)