Skip to content

Commit 45e8bad

Browse files
authored
feat: add LLM project price evaluation
Merged by MergeOS maintainer after CI and code review. Linked bounty issue remains open for tracking and payout review.
1 parent 960c62b commit 45e8bad

3 files changed

Lines changed: 306 additions & 6 deletions

File tree

Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
package core
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"math"
8+
"strings"
9+
)
10+
11+
// LLMPriceEvaluationRequest maps project fields for LLM analysis.
12+
type LLMPriceEvaluationRequest struct {
13+
Title string `json:"title"`
14+
Description string `json:"description"`
15+
Requirements []string `json:"requirements"`
16+
Deliverables []string `json:"deliverables"`
17+
Timeline string `json:"timeline"`
18+
TechStack string `json:"tech_stack"`
19+
Complexity string `json:"complexity"`
20+
Constraints string `json:"constraints"`
21+
ReferenceBudget int64 `json:"reference_budget"` // USD
22+
}
23+
24+
// LLMPriceEvaluationResponse is the structured result from LLM analysis.
25+
type LLMPriceEvaluationResponse struct {
26+
SuggestedLow int64 `json:"suggested_low"`
27+
SuggestedHigh int64 `json:"suggested_high"`
28+
ConfidenceLevel float64 `json:"confidence_level"`
29+
TaskBreakdown map[string]int64 `json:"task_breakdown"`
30+
Assumptions []string `json:"assumptions"`
31+
Risks []string `json:"risks"`
32+
Rationale string `json:"rationale"`
33+
Editable bool `json:"editable"`
34+
}
35+
36+
const llmPriceEvalMaxTokens = 1600
37+
38+
func buildLLMPriceEvaluationPrompt(req LLMPriceEvaluationRequest) string {
39+
var b strings.Builder
40+
b.WriteString(`You are an expert software project estimator at MergeOS. Analyze the project details below and provide a structured price evaluation.
41+
42+
Respond ONLY with a JSON object (no markdown fences, no extra text) using this exact schema:
43+
{
44+
"suggested_low": number,
45+
"suggested_high": number,
46+
"confidence_level": number,
47+
"task_breakdown": { "category_name": amount_in_usd },
48+
"assumptions": ["string", ...],
49+
"risks": ["string", ...],
50+
"rationale": "string"
51+
}
52+
53+
Rules:
54+
- suggested_low and suggested_high are in USD (whole dollars, no cents).
55+
- suggested_low must be <= suggested_high.
56+
- confidence_level is 0.0 to 1.0 (low detail = lower confidence).
57+
- task_breakdown: 3-6 categories covering the scope, each in USD summing roughly to the midpoint.
58+
- assumptions: 2-4 items based on project details provided.
59+
- risks: 2-3 items highlighting real risk factors.
60+
- rationale: 2-3 sentences explaining the estimate.
61+
- Consider tech stack complexity, number of deliverables, timeline pressure, and stated constraints.
62+
`)
63+
appendEvalField(&b, "Title", req.Title)
64+
appendEvalField(&b, "Description", req.Description)
65+
if len(req.Requirements) > 0 {
66+
appendEvalField(&b, "Requirements", strings.Join(req.Requirements, "\n- "))
67+
}
68+
if len(req.Deliverables) > 0 {
69+
appendEvalField(&b, "Deliverables", strings.Join(req.Deliverables, "\n- "))
70+
}
71+
appendEvalField(&b, "Timeline", req.Timeline)
72+
appendEvalField(&b, "Tech Stack", req.TechStack)
73+
appendEvalField(&b, "Complexity", req.Complexity)
74+
appendEvalField(&b, "Constraints", req.Constraints)
75+
if req.ReferenceBudget > 0 {
76+
appendEvalField(&b, "Reference Budget (USD)", fmt.Sprintf("%d", req.ReferenceBudget))
77+
}
78+
b.WriteString("\nReturn ONLY the JSON object. No markdown, no explanation.\n")
79+
return b.String()
80+
}
81+
82+
func appendEvalField(b *strings.Builder, name, value string) {
83+
value = strings.TrimSpace(value)
84+
if value == "" {
85+
return
86+
}
87+
b.WriteString("\n## ")
88+
b.WriteString(name)
89+
b.WriteString("\n")
90+
b.WriteString(value)
91+
b.WriteString("\n")
92+
}
93+
94+
// EvaluateProjectLLM performs the LLM evaluation and returns a structured response.
95+
// Falls back to a rule-based estimate if the LLM is unavailable.
96+
func (s *Server) EvaluateProjectLLM(ctx context.Context, req LLMPriceEvaluationRequest) (*LLMPriceEvaluationResponse, error) {
97+
if s.geminiReviewer == nil || !s.geminiReviewer.Ready() {
98+
return fallbackPriceEvaluation(req), nil
99+
}
100+
prompt := buildLLMPriceEvaluationPrompt(req)
101+
raw, _, _, err := s.geminiReviewer.generate(ctx, prompt)
102+
if err != nil {
103+
return nil, fmt.Errorf("LLM evaluation failed: %w", err)
104+
}
105+
resp, err := parseLLMPriceEvaluation(raw)
106+
if err != nil {
107+
return fallbackPriceEvaluation(req), nil
108+
}
109+
resp.Editable = true
110+
return resp, nil
111+
}
112+
113+
// parseLLMPriceEvaluation parses the LLM JSON response into a structured result.
114+
func parseLLMPriceEvaluation(raw string) (*LLMPriceEvaluationResponse, error) {
115+
raw = stripMarkdownFence(raw)
116+
var resp LLMPriceEvaluationResponse
117+
if err := json.Unmarshal([]byte(raw), &resp); err != nil {
118+
return nil, fmt.Errorf("failed to parse LLM response: %w", err)
119+
}
120+
// Validate and clamp
121+
if resp.SuggestedLow <= 0 {
122+
resp.SuggestedLow = 200
123+
}
124+
if resp.SuggestedHigh <= 0 || resp.SuggestedHigh < resp.SuggestedLow {
125+
resp.SuggestedHigh = resp.SuggestedLow * 2
126+
}
127+
if resp.ConfidenceLevel <= 0 || resp.ConfidenceLevel > 1 {
128+
resp.ConfidenceLevel = 0.7
129+
}
130+
if len(resp.TaskBreakdown) == 0 {
131+
mid := (resp.SuggestedLow + resp.SuggestedHigh) / 2
132+
resp.TaskBreakdown = map[string]int64{
133+
"Core Development": int64(math.Round(float64(mid) * 0.50 / 50) * 50),
134+
"Integration & QA": int64(math.Round(float64(mid) * 0.30 / 50) * 50),
135+
"Project Management": int64(math.Round(float64(mid) * 0.20 / 50) * 50),
136+
}
137+
}
138+
if len(resp.Assumptions) == 0 {
139+
resp.Assumptions = []string{"Standard development lifecycle with testing and deployment."}
140+
}
141+
if len(resp.Risks) == 0 {
142+
resp.Risks = []string{"Scope changes during development may affect pricing."}
143+
}
144+
if resp.Rationale == "" {
145+
resp.Rationale = "Estimate based on project scope, deliverables, and tech stack."
146+
}
147+
return &resp, nil
148+
}
149+
150+
func stripMarkdownFence(text string) string {
151+
text = strings.TrimSpace(text)
152+
// Remove ```json ... ``` or ``` ... ``` fences
153+
if strings.HasPrefix(text, "```") {
154+
lines := strings.SplitN(text, "\n", 2)
155+
if len(lines) > 1 {
156+
rest := lines[1]
157+
if idx := strings.LastIndex(rest, "```"); idx >= 0 {
158+
rest = rest[:idx]
159+
}
160+
text = strings.TrimSpace(rest)
161+
}
162+
}
163+
return text
164+
}
165+
166+
// fallbackPriceEvaluation produces a rule-based estimate when the LLM is unavailable.
167+
func fallbackPriceEvaluation(req LLMPriceEvaluationRequest) *LLMPriceEvaluationResponse {
168+
base := 1000.0
169+
170+
// Tech stack adjustments
171+
tech := strings.ToLower(req.TechStack)
172+
if strings.Contains(tech, "react") || strings.Contains(tech, "vue") || strings.Contains(tech, "next") || strings.Contains(tech, "angular") {
173+
base += 300
174+
}
175+
if strings.Contains(tech, "go") || strings.Contains(tech, "rust") || strings.Contains(tech, "fastapi") || strings.Contains(tech, "python") {
176+
base += 400
177+
}
178+
if strings.Contains(tech, "ai") || strings.Contains(tech, "llm") || strings.Contains(tech, "machine learning") || strings.Contains(tech, "ml") {
179+
base += 800
180+
}
181+
if strings.Contains(tech, "kubernetes") || strings.Contains(tech, "docker") || strings.Contains(tech, "devops") || strings.Contains(tech, "aws") || strings.Contains(tech, "gcp") {
182+
base += 500
183+
}
184+
if strings.Contains(tech, "solidity") || strings.Contains(tech, "web3") || strings.Contains(tech, "ethereum") || strings.Contains(tech, "solana") {
185+
base += 700
186+
}
187+
188+
// Deliverables add cost
189+
deliverableCount := len(req.Deliverables)
190+
base += float64(deliverableCount * 200)
191+
192+
// Requirement detail
193+
reqCount := len(req.Requirements)
194+
if reqCount > 3 {
195+
base += float64(reqCount) * 100
196+
}
197+
198+
// Complexity multiplier
199+
complexity := strings.ToLower(req.Complexity)
200+
switch complexity {
201+
case "high", "advanced", "complex", "very high", "critical":
202+
base *= 1.6
203+
case "medium", "moderate", "intermediate":
204+
base *= 1.2
205+
case "low", "simple", "easy":
206+
base *= 0.8
207+
}
208+
209+
// Timeline pressure
210+
timeline := strings.ToLower(req.Timeline)
211+
if strings.Contains(timeline, "urgent") || strings.Contains(timeline, "asap") || strings.Contains(timeline, "yesterday") {
212+
base *= 1.3
213+
}
214+
215+
// Constraints add overhead
216+
if req.Constraints != "" {
217+
base += 300
218+
}
219+
220+
// Blend with reference budget if provided
221+
if req.ReferenceBudget > 0 {
222+
base = base*0.7 + float64(req.ReferenceBudget)*0.3
223+
}
224+
225+
if base < 150 {
226+
base = 150
227+
}
228+
229+
low := int64(math.Round(base*0.85/50) * 50)
230+
high := int64(math.Round(base*1.25/50) * 50)
231+
mid := int64(math.Round(base / 50) * 50)
232+
233+
breakdown := map[string]int64{
234+
"Core Features & Logic": int64(math.Round(float64(mid)*0.50/50) * 50),
235+
"Frontend Integration": int64(math.Round(float64(mid)*0.25/50) * 50),
236+
"Testing & CI/CD": int64(math.Round(float64(mid)*0.15/50) * 50),
237+
"Project Management": int64(math.Round(float64(mid)*0.10/50) * 50),
238+
}
239+
240+
assumptions := []string{
241+
"Estimate assumes well-defined interfaces and clean design documents.",
242+
"Development follows standard lifecycle with code review and automated testing.",
243+
}
244+
if deliverableCount > 0 {
245+
assumptions = append(assumptions, fmt.Sprintf("All %d deliverables are independently testable.", deliverableCount))
246+
}
247+
248+
risks := []string{
249+
"Scope creep from ambiguous or changing deliverables.",
250+
}
251+
if strings.Contains(tech, "ai") || strings.Contains(tech, "llm") || strings.Contains(tech, "ml") {
252+
risks = append(risks, "AI model non-determinism and API latency/rate limits.")
253+
}
254+
if strings.Contains(timeline, "urgent") || strings.Contains(timeline, "asap") {
255+
risks = append(risks, "Urgent timelines may require parallel workstreams and higher coordination overhead.")
256+
}
257+
258+
rationale := fmt.Sprintf(
259+
"Tech stack (%s) and %d deliverables drive the estimate. Complexity %q with %s timeline.",
260+
req.TechStack, deliverableCount, req.Complexity, req.Timeline,
261+
)
262+
263+
return &LLMPriceEvaluationResponse{
264+
SuggestedLow: low,
265+
SuggestedHigh: high,
266+
ConfidenceLevel: 0.75,
267+
TaskBreakdown: breakdown,
268+
Assumptions: assumptions,
269+
Risks: risks,
270+
Rationale: rationale,
271+
Editable: true,
272+
}
273+
}

backend/internal/core/server.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ func (s *Server) Routes() http.Handler {
7272
mux.HandleFunc("POST /api/projects", s.createProject)
7373
mux.HandleFunc("POST /api/projects/evaluate", s.evaluateProject)
7474
mux.HandleFunc("POST /api/projects/evaluate-price", s.evaluateProjectPrice)
75+
mux.HandleFunc("POST /api/projects/evaluate-llm", s.evaluateProjectWithLLM)
7576
mux.HandleFunc("GET /api/tasks", s.tasks)
7677
mux.HandleFunc("POST /api/tasks/", s.acceptTask)
7778
mux.HandleFunc("GET /api/notifications", s.notifications)
@@ -602,6 +603,23 @@ func (s *Server) evaluateProjectPrice(w http.ResponseWriter, r *http.Request) {
602603
writeJSON(w, http.StatusOK, result)
603604
}
604605

606+
func (s *Server) evaluateProjectWithLLM(w http.ResponseWriter, r *http.Request) {
607+
if _, ok := s.requireUser(w, r); !ok {
608+
return
609+
}
610+
var req LLMPriceEvaluationRequest
611+
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
612+
writeError(w, http.StatusBadRequest, "invalid JSON body")
613+
return
614+
}
615+
result, err := s.EvaluateProjectLLM(r.Context(), req)
616+
if err != nil {
617+
writeError(w, http.StatusBadGateway, err.Error())
618+
return
619+
}
620+
writeJSON(w, http.StatusOK, result)
621+
}
622+
605623
func (s *Server) createProject(w http.ResponseWriter, r *http.Request) {
606624
user, ok := s.requireUser(w, r)
607625
if !ok {

frontend/src/App.vue

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4085,12 +4085,21 @@ async function triggerAiEvaluation() {
40854085
reference_budget: Math.round(usdFromMRG(projectSetupForm.budgetAmount))
40864086
};
40874087
4088-
const response = await api('/api/projects/evaluate', {
4089-
method: 'POST',
4090-
body: JSON.stringify(payload)
4091-
});
4092-
4093-
aiEvaluationResult.value = response;
4088+
// Try LLM-powered evaluation first, with fallback to rule-based
4089+
try {
4090+
const response = await api('/api/projects/evaluate-llm', {
4091+
method: 'POST',
4092+
body: JSON.stringify(payload)
4093+
});
4094+
aiEvaluationResult.value = response;
4095+
} catch (_llmErr) {
4096+
// LLM endpoint unavailable — use rule-based evaluation
4097+
const response = await api('/api/projects/evaluate', {
4098+
method: 'POST',
4099+
body: JSON.stringify(payload)
4100+
});
4101+
aiEvaluationResult.value = response;
4102+
}
40944103
} catch (err) {
40954104
console.error('AI evaluation failed:', err);
40964105
aiEvaluationError.value = err.message || 'AI evaluation failed. Please try again.';

0 commit comments

Comments
 (0)