-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdashboard.py
More file actions
123 lines (110 loc) · 5.48 KB
/
dashboard.py
File metadata and controls
123 lines (110 loc) · 5.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from dotenv import load_dotenv
load_dotenv()
import json
import http.server
import socketserver
import os
def generate_dashboard(eval_file: str = "eval_results.json") -> str:
with open(eval_file) as f:
data = json.load(f)
rows = ""
for r in data["results"]:
h = r["hallucination"]
ret = r["retrieval"]
verdict_color = {"GROUNDED": "#2a7a2a", "PARTIAL": "#b87a00", "HALLUCINATED": "#cc0000"}[h["verdict"]]
quality_color = {"GOOD": "#2a7a2a", "FAIR": "#b87a00", "POOR": "#cc0000"}[ret["quality"]]
pages = ", ".join(str(p) for p in r["pages"])
answer_short = r["answer"][:180].replace("<", "<").replace(">", ">") + "..."
rows += f"""
<tr>
<td style="padding:12px;border-bottom:1px solid #eee;max-width:200px">{r["question"]}</td>
<td style="padding:12px;border-bottom:1px solid #eee;font-size:12px;color:#666">{answer_short}</td>
<td style="padding:12px;border-bottom:1px solid #eee;text-align:center">p.{pages}</td>
<td style="padding:12px;border-bottom:1px solid #eee;text-align:center">
<span style="color:{verdict_color};font-weight:600">{h["verdict"]}</span>
<br><small style="color:#999">{h["max_similarity"]}</small>
</td>
<td style="padding:12px;border-bottom:1px solid #eee;text-align:center">
<span style="color:{quality_color};font-weight:600">{ret["quality"]}</span>
<br><small style="color:#999">{ret["avg_relevance"]}</small>
</td>
</tr>"""
grounded_pct = round(data["grounded"] / data["total_questions"] * 100)
retrieval_pct = round(data["avg_retrieval_score"] * 100)
html = f"""<!DOCTYPE html>
<html>
<head>
<title>RAG Evaluation Dashboard</title>
<style>
body {{ font-family: system-ui, sans-serif; max-width: 1000px; margin: 0 auto; padding: 2rem; background: #f9f9f9; }}
h1 {{ font-size: 24px; font-weight: 600; margin-bottom: 4px; }}
.subtitle {{ color: #666; margin-bottom: 2rem; }}
.cards {{ display: grid; grid-template-columns: repeat(4, 1fr); gap: 16px; margin-bottom: 2rem; }}
.card {{ background: white; border-radius: 12px; padding: 1.25rem; border: 1px solid #eee; }}
.card-label {{ font-size: 12px; color: #999; margin-bottom: 4px; }}
.card-value {{ font-size: 28px; font-weight: 600; }}
.card-sub {{ font-size: 12px; color: #666; margin-top: 4px; }}
table {{ width: 100%; background: white; border-radius: 12px; border-collapse: collapse; overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.05); }}
th {{ padding: 12px; background: #f4f4f4; text-align: left; font-size: 13px; color: #666; font-weight: 500; }}
tr:hover {{ background: #fafafa; }}
.bar {{ height: 8px; border-radius: 4px; background: #eee; margin-top: 6px; }}
.bar-fill {{ height: 100%; border-radius: 4px; background: #0066cc; }}
</style>
</head>
<body>
<h1>RAG Evaluation Dashboard</h1>
<p class="subtitle">Collection: <strong>{data["collection"]}</strong> | {data["total_questions"]} questions evaluated</p>
<div class="cards">
<div class="card">
<div class="card-label">Grounded answers</div>
<div class="card-value" style="color:#2a7a2a">{data["grounded"]}/{data["total_questions"]}</div>
<div class="bar"><div class="bar-fill" style="width:{grounded_pct}%;background:#2a7a2a"></div></div>
<div class="card-sub">{grounded_pct}% of answers</div>
</div>
<div class="card">
<div class="card-label">Hallucinated</div>
<div class="card-value" style="color:{'#cc0000' if data['hallucinated'] > 0 else '#2a7a2a'}">{data["hallucinated"]}</div>
<div class="card-sub">{'Needs attention' if data['hallucinated'] > 0 else 'None detected'}</div>
</div>
<div class="card">
<div class="card-label">Avg grounding score</div>
<div class="card-value">{data["avg_hallucination_score"]}</div>
<div class="bar"><div class="bar-fill" style="width:{round(data['avg_hallucination_score']*100)}%"></div></div>
<div class="card-sub">0.75+ is strong</div>
</div>
<div class="card">
<div class="card-label">Avg retrieval score</div>
<div class="card-value">{data["avg_retrieval_score"]}</div>
<div class="bar"><div class="bar-fill" style="width:{retrieval_pct}%"></div></div>
<div class="card-sub">0.4+ is good</div>
</div>
</div>
<table>
<thead>
<tr>
<th>Question</th>
<th>Answer (preview)</th>
<th>Pages</th>
<th>Hallucination</th>
<th>Retrieval</th>
</tr>
</thead>
<tbody>{rows}</tbody>
</table>
<p style="font-size:12px;color:#bbb;text-align:center;margin-top:2rem">
RAG Doc Intelligence — github.com/arya312/rag-doc-intelligence
</p>
</body>
</html>"""
with open("dashboard.html", "w") as f:
f.write(html)
print("Dashboard saved to dashboard.html")
return html
if __name__ == "__main__":
generate_dashboard()
print("Serving dashboard at http://localhost:8080")
print("Open the Ports tab and forward port 8080 to view it in browser")
os.chdir(".")
handler = http.server.SimpleHTTPRequestHandler
with socketserver.TCPServer(("", 8080), handler) as httpd:
httpd.serve_forever()