|
| 1 | +apiVersion: v1 |
| 2 | +kind: ConfigMap |
| 3 | +metadata: |
| 4 | + name: har-analyzer-function |
| 5 | + namespace: open-webui |
| 6 | +data: |
| 7 | + har-analyzer.py: | |
| 8 | + """ |
| 9 | + title: HAR File Analyzer (Full) |
| 10 | + author: Claude |
| 11 | + version: 2.0.0 |
| 12 | + description: Comprehensive HAR analyzer - WebSockets, WebRTC, caching, security, performance, third-party, and more. |
| 13 | + requirements: |
| 14 | + """ |
| 15 | +
|
| 16 | + import json |
| 17 | + import re |
| 18 | + from typing import Optional, Dict, List, Any |
| 19 | + from pydantic import BaseModel, Field |
| 20 | + from urllib.parse import urlparse |
| 21 | +
|
| 22 | +
|
| 23 | + class Tools: |
| 24 | + class Valves(BaseModel): |
| 25 | + max_slow_requests: int = Field(default=20, description="Maximum slow requests to show") |
| 26 | + max_errors: int = Field(default=30, description="Maximum errors to show") |
| 27 | + max_websocket_messages: int = Field(default=20, description="Maximum WebSocket messages to show") |
| 28 | + slow_threshold_ms: int = Field(default=1000, description="Threshold for 'slow' requests (ms)") |
| 29 | + large_response_kb: int = Field(default=500, description="Threshold for 'large' responses (KB)") |
| 30 | +
|
| 31 | + def __init__(self): |
| 32 | + self.valves = self.Valves() |
| 33 | +
|
| 34 | + def analyze_har(self, har_content: str) -> str: |
| 35 | + """ |
| 36 | + Comprehensive HAR file analysis including WebSockets, WebRTC, caching, security, and performance. |
| 37 | +
|
| 38 | + :param har_content: The raw JSON content of a HAR file |
| 39 | + :return: A detailed structured analysis report |
| 40 | + """ |
| 41 | + try: |
| 42 | + har = json.loads(har_content) |
| 43 | + except json.JSONDecodeError as e: |
| 44 | + return f"Error parsing HAR file: {e}" |
| 45 | +
|
| 46 | + entries = har.get("log", {}).get("entries", []) |
| 47 | + if not entries: |
| 48 | + return "No entries found in HAR file" |
| 49 | +
|
| 50 | + analysis = { |
| 51 | + "total_requests": len(entries), |
| 52 | + "total_time": 0, |
| 53 | + "total_size": 0, |
| 54 | + "errors": [], |
| 55 | + "slow_requests": [], |
| 56 | + "large_responses": [], |
| 57 | + "redirects": [], |
| 58 | + "websockets": [], |
| 59 | + "webrtc": [], |
| 60 | + "domains": {}, |
| 61 | + "status_codes": {}, |
| 62 | + "content_types": {}, |
| 63 | + "methods": {}, |
| 64 | + "caching_issues": [], |
| 65 | + "security_issues": [], |
| 66 | + "third_party": [], |
| 67 | + "cors_issues": [], |
| 68 | + "compression": {"compressed": 0, "uncompressed": 0, "savings": 0}, |
| 69 | + "cookies": {"sent": 0, "received": 0, "insecure": []}, |
| 70 | + "timing_breakdown": {"blocked": 0, "dns": 0, "connect": 0, "ssl": 0, "send": 0, "wait": 0, "receive": 0}, |
| 71 | + "protocols": {}, |
| 72 | + "initiators": {}, |
| 73 | + } |
| 74 | +
|
| 75 | + first_party_domain = "" |
| 76 | + if entries: |
| 77 | + first_url = entries[0].get("request", {}).get("url", "") |
| 78 | + try: |
| 79 | + first_party_domain = urlparse(first_url).netloc |
| 80 | + except: |
| 81 | + pass |
| 82 | +
|
| 83 | + for entry in entries: |
| 84 | + self._analyze_entry(entry, analysis, first_party_domain) |
| 85 | +
|
| 86 | + return self._build_report(analysis, first_party_domain) |
| 87 | +
|
| 88 | + def _analyze_entry(self, entry: Dict, analysis: Dict, first_party_domain: str): |
| 89 | + request = entry.get("request", {}) |
| 90 | + response = entry.get("response", {}) |
| 91 | + timings = entry.get("timings", {}) |
| 92 | +
|
| 93 | + url = request.get("url", "") |
| 94 | + method = request.get("method", "") |
| 95 | + status = response.get("status", 0) |
| 96 | + time_ms = entry.get("time", 0) or 0 |
| 97 | +
|
| 98 | + content = response.get("content", {}) |
| 99 | + response_size = content.get("size", 0) or 0 |
| 100 | +
|
| 101 | + try: |
| 102 | + parsed = urlparse(url) |
| 103 | + domain = parsed.netloc |
| 104 | + path = parsed.path |
| 105 | + scheme = parsed.scheme |
| 106 | + except: |
| 107 | + domain = "unknown" |
| 108 | + path = url |
| 109 | + scheme = "" |
| 110 | +
|
| 111 | + analysis["total_time"] += time_ms |
| 112 | + analysis["total_size"] += response_size |
| 113 | + analysis["methods"][method] = analysis["methods"].get(method, 0) + 1 |
| 114 | + analysis["status_codes"][status] = analysis["status_codes"].get(status, 0) + 1 |
| 115 | +
|
| 116 | + mime_type = content.get("mimeType", "unknown") |
| 117 | + base_mime = mime_type.split(";")[0].strip() |
| 118 | + analysis["content_types"][base_mime] = analysis["content_types"].get(base_mime, 0) + 1 |
| 119 | +
|
| 120 | + http_version = response.get("httpVersion", "unknown") |
| 121 | + analysis["protocols"][http_version] = analysis["protocols"].get(http_version, 0) + 1 |
| 122 | +
|
| 123 | + if domain not in analysis["domains"]: |
| 124 | + analysis["domains"][domain] = { |
| 125 | + "count": 0, "total_time": 0, "total_size": 0, "errors": 0, |
| 126 | + "is_third_party": domain != first_party_domain and first_party_domain != "" |
| 127 | + } |
| 128 | + analysis["domains"][domain]["count"] += 1 |
| 129 | + analysis["domains"][domain]["total_time"] += time_ms |
| 130 | + analysis["domains"][domain]["total_size"] += response_size |
| 131 | +
|
| 132 | + for key in ["blocked", "dns", "connect", "ssl", "send", "wait", "receive"]: |
| 133 | + val = timings.get(key, 0) |
| 134 | + if val and val > 0: |
| 135 | + analysis["timing_breakdown"][key] += val |
| 136 | +
|
| 137 | + req_headers = {h.get("name", "").lower(): h.get("value", "") for h in request.get("headers", [])} |
| 138 | + res_headers = {h.get("name", "").lower(): h.get("value", "") for h in response.get("headers", [])} |
| 139 | +
|
| 140 | + if scheme in ["ws", "wss"] or res_headers.get("upgrade", "").lower() == "websocket": |
| 141 | + ws_entry = {"url": url[:150], "status": status, "messages": []} |
| 142 | + ws_messages = entry.get("_webSocketMessages", []) |
| 143 | + for msg in ws_messages[:self.valves.max_websocket_messages]: |
| 144 | + ws_entry["messages"].append({ |
| 145 | + "type": msg.get("type", ""), |
| 146 | + "time": msg.get("time", ""), |
| 147 | + "data": str(msg.get("data", ""))[:200] |
| 148 | + }) |
| 149 | + analysis["websockets"].append(ws_entry) |
| 150 | +
|
| 151 | + webrtc_patterns = [r"stun:", r"turn:", r"\.twilio\.com", r"\.xirsys\.com", r"webrtc", r"rtc\.", r"\.peerjs\.", r"signaling", r"ice.*candidate", r"sdp", r"peer.*connection"] |
| 152 | + is_webrtc = any(re.search(p, url.lower()) for p in webrtc_patterns) or "application/sdp" in mime_type.lower() |
| 153 | +
|
| 154 | + if is_webrtc: |
| 155 | + rtc_type = "WebRTC" |
| 156 | + if "stun" in url.lower(): rtc_type = "STUN" |
| 157 | + elif "turn" in url.lower(): rtc_type = "TURN" |
| 158 | + elif "signal" in url.lower(): rtc_type = "Signaling" |
| 159 | + elif "sdp" in url.lower(): rtc_type = "SDP" |
| 160 | + analysis["webrtc"].append({"url": url[:150], "type": rtc_type, "method": method, "status": status, "time_ms": round(time_ms, 2)}) |
| 161 | +
|
| 162 | + if status >= 400: |
| 163 | + analysis["domains"][domain]["errors"] += 1 |
| 164 | + analysis["errors"].append({"url": url[:150], "method": method, "status": status, "status_text": response.get("statusText", ""), "time_ms": round(time_ms, 2), "content_type": base_mime}) |
| 165 | +
|
| 166 | + if time_ms > self.valves.slow_threshold_ms: |
| 167 | + analysis["slow_requests"].append({"url": url[:150], "method": method, "time_ms": round(time_ms, 2), "wait_ms": round(timings.get("wait", 0) or 0, 2), "status": status, "size_kb": round(response_size / 1024, 2), "timings": {k: round(v, 2) if v and v > 0 else 0 for k, v in timings.items()}}) |
| 168 | +
|
| 169 | + if response_size > self.valves.large_response_kb * 1024: |
| 170 | + analysis["large_responses"].append({"url": url[:150], "size_kb": round(response_size / 1024, 2), "content_type": base_mime, "compressed": "content-encoding" in res_headers}) |
| 171 | +
|
| 172 | + if 300 <= status < 400: |
| 173 | + analysis["redirects"].append({"from": url[:100], "to": res_headers.get("location", "")[:100], "status": status}) |
| 174 | +
|
| 175 | + if domain != first_party_domain and first_party_domain: |
| 176 | + if not any(tp["domain"] == domain for tp in analysis["third_party"]): |
| 177 | + tp_type = "Other" |
| 178 | + dl = domain.lower() |
| 179 | + if any(x in dl for x in ["analytics", "mixpanel", "segment"]): tp_type = "Analytics" |
| 180 | + elif any(x in dl for x in ["doubleclick", "adsense", "adnxs"]): tp_type = "Advertising" |
| 181 | + elif any(x in dl for x in ["cloudflare", "cdn", "akamai", "fastly"]): tp_type = "CDN" |
| 182 | + elif any(x in dl for x in ["fonts.googleapis", "fonts.gstatic"]): tp_type = "Fonts" |
| 183 | + analysis["third_party"].append({"domain": domain, "type": tp_type}) |
| 184 | +
|
| 185 | + def _build_report(self, analysis: Dict, first_party_domain: str) -> str: |
| 186 | + report = [] |
| 187 | + report.append("# HAR Analysis Report\n") |
| 188 | + report.append("## Summary") |
| 189 | + report.append(f"- **Total Requests:** {analysis['total_requests']}") |
| 190 | + report.append(f"- **Total Load Time:** {round(analysis['total_time'] / 1000, 2)}s") |
| 191 | + report.append(f"- **Total Data:** {self._format_size(analysis['total_size'])}") |
| 192 | + report.append(f"- **First-Party:** {first_party_domain}") |
| 193 | + report.append(f"- **Third-Party Domains:** {len(analysis['third_party'])}") |
| 194 | + report.append(f"- **Errors:** {len(analysis['errors'])}") |
| 195 | + report.append(f"- **WebSockets:** {len(analysis['websockets'])}") |
| 196 | + report.append(f"- **WebRTC:** {len(analysis['webrtc'])}") |
| 197 | + report.append("") |
| 198 | +
|
| 199 | + report.append("## Status Codes") |
| 200 | + for code, count in sorted(analysis["status_codes"].items()): |
| 201 | + pct = round(count / analysis["total_requests"] * 100, 1) |
| 202 | + report.append(f"- **{code}:** {count} ({pct}%)") |
| 203 | + report.append("") |
| 204 | +
|
| 205 | + report.append("## Top Domains") |
| 206 | + report.append("| Domain | Requests | Time | Size | Errors |") |
| 207 | + report.append("|--------|----------|------|------|--------|") |
| 208 | + sorted_domains = sorted(analysis["domains"].items(), key=lambda x: x[1]["total_time"], reverse=True)[:10] |
| 209 | + for domain, stats in sorted_domains: |
| 210 | + report.append(f"| {domain[:30]} | {stats['count']} | {round(stats['total_time']/1000, 2)}s | {self._format_size(stats['total_size'])} | {stats['errors']} |") |
| 211 | + report.append("") |
| 212 | +
|
| 213 | + if analysis["websockets"]: |
| 214 | + report.append("## WebSocket Connections") |
| 215 | + for ws in analysis["websockets"][:5]: |
| 216 | + report.append(f"- **{ws['url']}** (status: {ws['status']}, messages: {len(ws['messages'])})") |
| 217 | + report.append("") |
| 218 | +
|
| 219 | + if analysis["webrtc"]: |
| 220 | + report.append("## WebRTC Activity") |
| 221 | + for rtc in analysis["webrtc"][:10]: |
| 222 | + report.append(f"- **{rtc['type']}** {rtc['url']} ({rtc['time_ms']}ms)") |
| 223 | + report.append("") |
| 224 | +
|
| 225 | + if analysis["slow_requests"]: |
| 226 | + report.append(f"## Slowest Requests (>{self.valves.slow_threshold_ms}ms)") |
| 227 | + sorted_slow = sorted(analysis["slow_requests"], key=lambda x: x["time_ms"], reverse=True)[:15] |
| 228 | + for req in sorted_slow: |
| 229 | + report.append(f"- **{req['time_ms']}ms** {req['method']} {req['url']}") |
| 230 | + report.append("") |
| 231 | +
|
| 232 | + if analysis["errors"]: |
| 233 | + report.append("## Errors") |
| 234 | + for err in analysis["errors"][:20]: |
| 235 | + report.append(f"- **{err['status']}** {err['method']} {err['url']}") |
| 236 | + report.append("") |
| 237 | +
|
| 238 | + if analysis["third_party"]: |
| 239 | + report.append("## Third-Party Services") |
| 240 | + by_type = {} |
| 241 | + for tp in analysis["third_party"]: |
| 242 | + t = tp["type"] |
| 243 | + if t not in by_type: by_type[t] = [] |
| 244 | + by_type[t].append(tp["domain"]) |
| 245 | + for t, domains in sorted(by_type.items()): |
| 246 | + report.append(f"**{t}:** {', '.join(domains[:5])}") |
| 247 | + report.append("") |
| 248 | +
|
| 249 | + return "\n".join(report) |
| 250 | +
|
| 251 | + def _format_size(self, bytes_size: int) -> str: |
| 252 | + if bytes_size < 1024: return f"{bytes_size}B" |
| 253 | + elif bytes_size < 1024 * 1024: return f"{round(bytes_size / 1024, 1)}KB" |
| 254 | + else: return f"{round(bytes_size / (1024 * 1024), 2)}MB" |
| 255 | +--- |
| 256 | +apiVersion: batch/v1 |
| 257 | +kind: Job |
| 258 | +metadata: |
| 259 | + name: load-har-analyzer |
| 260 | + namespace: open-webui |
| 261 | + annotations: |
| 262 | + argocd.argoproj.io/hook: PostSync |
| 263 | + argocd.argoproj.io/hook-delete-policy: HookSucceeded |
| 264 | +spec: |
| 265 | + ttlSecondsAfterFinished: 300 |
| 266 | + template: |
| 267 | + spec: |
| 268 | + restartPolicy: OnFailure |
| 269 | + containers: |
| 270 | + - name: loader |
| 271 | + image: curlimages/curl:latest |
| 272 | + command: |
| 273 | + - /bin/sh |
| 274 | + - -c |
| 275 | + - | |
| 276 | + echo "Waiting for Open-WebUI to be ready..." |
| 277 | + sleep 30 |
| 278 | +
|
| 279 | + # Read the function code |
| 280 | + FUNC_CODE=$(cat /functions/har-analyzer.py | jq -Rs .) |
| 281 | +
|
| 282 | + # Create the function via API (requires admin token) |
| 283 | + # Note: You'll need to set ADMIN_TOKEN after first login |
| 284 | + if [ -n "$ADMIN_TOKEN" ]; then |
| 285 | + curl -X POST "http://open-webui.open-webui.svc.cluster.local:8080/api/v1/functions/create" \ |
| 286 | + -H "Authorization: Bearer $ADMIN_TOKEN" \ |
| 287 | + -H "Content-Type: application/json" \ |
| 288 | + -d "{ |
| 289 | + \"id\": \"har_analyzer\", |
| 290 | + \"name\": \"HAR File Analyzer\", |
| 291 | + \"type\": \"tool\", |
| 292 | + \"content\": $FUNC_CODE, |
| 293 | + \"meta\": { |
| 294 | + \"description\": \"Comprehensive HAR analyzer - WebSockets, WebRTC, caching, security, performance\" |
| 295 | + } |
| 296 | + }" |
| 297 | + echo "Function loaded!" |
| 298 | + else |
| 299 | + echo "ADMIN_TOKEN not set. Load function manually via UI." |
| 300 | + echo "Function code available at /functions/har-analyzer.py" |
| 301 | + fi |
| 302 | + env: |
| 303 | + - name: ADMIN_TOKEN |
| 304 | + valueFrom: |
| 305 | + secretKeyRef: |
| 306 | + name: open-webui-admin-token |
| 307 | + key: token |
| 308 | + optional: true |
| 309 | + volumeMounts: |
| 310 | + - name: functions |
| 311 | + mountPath: /functions |
| 312 | + volumes: |
| 313 | + - name: functions |
| 314 | + configMap: |
| 315 | + name: har-analyzer-function |
0 commit comments