|
| 1 | +using System.ComponentModel; |
| 2 | +using System.Text.Json; |
| 3 | +using Microsoft.Extensions.AI; |
| 4 | + |
| 5 | +using AzureFinOps.Dashboard.Auth; |
| 6 | +using AzureFinOps.Dashboard.Infrastructure; |
| 7 | + |
| 8 | +namespace AzureFinOps.Dashboard.AI.Tools; |
| 9 | + |
| 10 | +/// <summary> |
| 11 | +/// Cost anomaly detection — fetches daily costs from Cost Management and flags |
| 12 | +/// days that deviate >2 standard deviations from the rolling baseline mean. |
| 13 | +/// Returns structured JSON the LLM can summarize. |
| 14 | +/// </summary> |
| 15 | +public class AnomalyTools |
| 16 | +{ |
| 17 | + private readonly UserTokens _tokens; |
| 18 | + |
| 19 | + public AnomalyTools(UserTokens tokens) => _tokens = tokens; |
| 20 | + |
| 21 | + public IEnumerable<AIFunction> Create() |
| 22 | + { |
| 23 | + yield return AIFunctionFactory.Create(DetectCostAnomalies, "DetectCostAnomalies", |
| 24 | + @"Detects cost anomalies (spikes/drops) in a subscription's recent daily spend using statistical baselining (z-score over rolling window). |
| 25 | +
|
| 26 | +Use when the user asks about: |
| 27 | +- 'Why did costs spike?' |
| 28 | +- 'Are there any cost anomalies?' |
| 29 | +- 'Did anything unusual happen in our spending last week?' |
| 30 | +- 'Investigate cost increase' |
| 31 | +
|
| 32 | +Returns JSON with: |
| 33 | +- baseline_mean, baseline_stddev, threshold (mean + 2*stddev) |
| 34 | +- anomalies[]: dates where cost > threshold, with magnitude and grouping breakdown |
| 35 | +- summary: human-readable explanation |
| 36 | +
|
| 37 | +After calling, drill into anomalous dates with QueryAzure (Cost Management /query grouped by ResourceGroup or ServiceName for the specific date range) to find the root cause."); |
| 38 | + } |
| 39 | + |
| 40 | + private async Task<string> DetectCostAnomalies( |
| 41 | + [Description("Subscription ID to analyze")] string subscriptionId, |
| 42 | + [Description("Days of history to fetch (baseline + detection window). Default 35.")] int days = 35, |
| 43 | + [Description("Z-score threshold for flagging an anomaly. Default 2.0 (= ~95% confidence). Use 1.5 for more sensitive, 3.0 for stricter.")] double zThreshold = 2.0, |
| 44 | + [Description("Optional grouping for breakdown of anomalous days: 'ServiceName', 'ResourceGroup', 'MeterCategory'. Default 'ServiceName'.")] string groupBy = "ServiceName") |
| 45 | + { |
| 46 | + var token = _tokens.AzureToken; |
| 47 | + if (string.IsNullOrEmpty(token)) |
| 48 | + return HttpHelper.TokenMissing("AzureToken", null, "anomaly"); |
| 49 | + |
| 50 | + if (string.IsNullOrWhiteSpace(subscriptionId)) |
| 51 | + return "Error: subscriptionId is required."; |
| 52 | + |
| 53 | + days = Math.Clamp(days, 14, 90); |
| 54 | + zThreshold = Math.Clamp(zThreshold, 1.0, 5.0); |
| 55 | + if (string.IsNullOrWhiteSpace(groupBy)) groupBy = "ServiceName"; |
| 56 | + |
| 57 | + var to = DateTime.UtcNow.Date; |
| 58 | + var from = to.AddDays(-days); |
| 59 | + |
| 60 | + // Cost Management daily query (no grouping — total daily cost for baseline) |
| 61 | + var dailyBody = JsonSerializer.Serialize(new |
| 62 | + { |
| 63 | + type = "ActualCost", |
| 64 | + timeframe = "Custom", |
| 65 | + timePeriod = new { from = from.ToString("yyyy-MM-dd"), to = to.ToString("yyyy-MM-dd") }, |
| 66 | + dataset = new |
| 67 | + { |
| 68 | + granularity = "Daily", |
| 69 | + aggregation = new { totalCost = new { name = "Cost", function = "Sum" } } |
| 70 | + } |
| 71 | + }); |
| 72 | + |
| 73 | + using var activity = HttpHelper.Telemetry.StartActivity("DetectCostAnomalies"); |
| 74 | + activity?.SetTag("anomaly.subscription", subscriptionId); |
| 75 | + activity?.SetTag("anomaly.days", days); |
| 76 | + activity?.SetTag("anomaly.z_threshold", zThreshold); |
| 77 | + |
| 78 | + var dailyUrl = $"https://management.azure.com/subscriptions/{subscriptionId}/providers/Microsoft.CostManagement/query?api-version=2025-03-01"; |
| 79 | + var dailyResp = await HttpHelper.SendWithRetryAsync( |
| 80 | + dailyUrl, token, activity, "anomaly.daily", |
| 81 | + method: HttpMethod.Post, jsonBody: dailyBody); |
| 82 | + |
| 83 | + if (!dailyResp.StartsWith("HTTP 200")) |
| 84 | + return $"Error fetching daily costs:\n{dailyResp[..Math.Min(dailyResp.Length, 1500)]}"; |
| 85 | + |
| 86 | + // Parse daily costs: rows are [cost, date, currency] per CostManagement schema |
| 87 | + var dailyJson = dailyResp[(dailyResp.IndexOf('\n') + 1)..]; // strip "HTTP 200 OK\n" |
| 88 | + // strip optional "Current UTC time" line |
| 89 | + if (dailyJson.StartsWith("Current UTC time:")) dailyJson = dailyJson[(dailyJson.IndexOf('\n') + 1)..]; |
| 90 | + |
| 91 | + var (series, parseErr) = ParseDailyCosts(dailyJson); |
| 92 | + if (parseErr is not null) return $"Error parsing cost response: {parseErr}\nRaw: {dailyJson[..Math.Min(dailyJson.Length, 800)]}"; |
| 93 | + if (series.Count < 7) return $"Not enough data to baseline (got {series.Count} days, need >=7). Try a wider 'days' window."; |
| 94 | + |
| 95 | + // Compute rolling baseline: use first (days-7) days as baseline, last 7 as detection window |
| 96 | + var detectionDays = Math.Min(7, series.Count / 3); |
| 97 | + var baseline = series.Take(series.Count - detectionDays).ToList(); |
| 98 | + var detection = series.Skip(series.Count - detectionDays).ToList(); |
| 99 | + |
| 100 | + var mean = baseline.Average(p => p.Cost); |
| 101 | + var variance = baseline.Sum(p => Math.Pow(p.Cost - mean, 2)) / baseline.Count; |
| 102 | + var stddev = Math.Sqrt(variance); |
| 103 | + var threshold = mean + zThreshold * stddev; |
| 104 | + var lowThreshold = Math.Max(0, mean - zThreshold * stddev); |
| 105 | + |
| 106 | + var anomalies = new List<object>(); |
| 107 | + foreach (var p in detection) |
| 108 | + { |
| 109 | + if (stddev < 0.01) continue; // flat baseline, can't detect |
| 110 | + var z = (p.Cost - mean) / stddev; |
| 111 | + if (Math.Abs(z) >= zThreshold) |
| 112 | + { |
| 113 | + // Drill down for this specific day |
| 114 | + var breakdown = await GetBreakdownForDay(token, subscriptionId, p.Date, groupBy, activity); |
| 115 | + anomalies.Add(new |
| 116 | + { |
| 117 | + date = p.Date.ToString("yyyy-MM-dd"), |
| 118 | + cost = Math.Round(p.Cost, 2), |
| 119 | + z_score = Math.Round(z, 2), |
| 120 | + deviation_pct = mean > 0.01 ? Math.Round((p.Cost - mean) / mean * 100, 1) : 0, |
| 121 | + direction = z > 0 ? "spike" : "drop", |
| 122 | + top_contributors = breakdown |
| 123 | + }); |
| 124 | + } |
| 125 | + } |
| 126 | + |
| 127 | + var result = new |
| 128 | + { |
| 129 | + subscription_id = subscriptionId, |
| 130 | + window = new { from = from.ToString("yyyy-MM-dd"), to = to.ToString("yyyy-MM-dd"), baseline_days = baseline.Count, detection_days = detection.Count }, |
| 131 | + baseline = new |
| 132 | + { |
| 133 | + mean = Math.Round(mean, 2), |
| 134 | + stddev = Math.Round(stddev, 2), |
| 135 | + z_threshold = zThreshold, |
| 136 | + upper_threshold = Math.Round(threshold, 2), |
| 137 | + lower_threshold = Math.Round(lowThreshold, 2), |
| 138 | + }, |
| 139 | + anomalies_found = anomalies.Count, |
| 140 | + anomalies, |
| 141 | + recent_daily_costs = detection.Select(p => new { date = p.Date.ToString("yyyy-MM-dd"), cost = Math.Round(p.Cost, 2) }), |
| 142 | + }; |
| 143 | + |
| 144 | + return JsonSerializer.Serialize(result, new JsonSerializerOptions { WriteIndented = true }); |
| 145 | + } |
| 146 | + |
| 147 | + private record DailyPoint(DateTime Date, double Cost); |
| 148 | + |
| 149 | + private static (List<DailyPoint> series, string? error) ParseDailyCosts(string json) |
| 150 | + { |
| 151 | + try |
| 152 | + { |
| 153 | + using var doc = JsonDocument.Parse(json); |
| 154 | + if (!doc.RootElement.TryGetProperty("properties", out var props)) return (new(), "missing 'properties'"); |
| 155 | + if (!props.TryGetProperty("rows", out var rows)) return (new(), "missing 'rows'"); |
| 156 | + if (!props.TryGetProperty("columns", out var cols)) return (new(), "missing 'columns'"); |
| 157 | + |
| 158 | + int costIdx = -1, dateIdx = -1, i = 0; |
| 159 | + foreach (var c in cols.EnumerateArray()) |
| 160 | + { |
| 161 | + var name = c.GetProperty("name").GetString() ?? ""; |
| 162 | + if (name.Equals("Cost", StringComparison.OrdinalIgnoreCase) || name.Equals("PreTaxCost", StringComparison.OrdinalIgnoreCase)) costIdx = i; |
| 163 | + if (name.Equals("UsageDate", StringComparison.OrdinalIgnoreCase) || name.Equals("BillingMonth", StringComparison.OrdinalIgnoreCase)) dateIdx = i; |
| 164 | + i++; |
| 165 | + } |
| 166 | + if (costIdx < 0 || dateIdx < 0) return (new(), $"could not locate Cost/UsageDate columns (got {i} columns)"); |
| 167 | + |
| 168 | + var series = new List<DailyPoint>(); |
| 169 | + foreach (var row in rows.EnumerateArray()) |
| 170 | + { |
| 171 | + var cost = row[costIdx].GetDouble(); |
| 172 | + var dateRaw = row[dateIdx].ValueKind == JsonValueKind.Number ? row[dateIdx].GetInt32().ToString() : row[dateIdx].GetString() ?? ""; |
| 173 | + if (DateTime.TryParseExact(dateRaw, "yyyyMMdd", null, System.Globalization.DateTimeStyles.None, out var d) |
| 174 | + || DateTime.TryParse(dateRaw, out d)) |
| 175 | + { |
| 176 | + series.Add(new DailyPoint(d.Date, cost)); |
| 177 | + } |
| 178 | + } |
| 179 | + return (series.OrderBy(p => p.Date).ToList(), null); |
| 180 | + } |
| 181 | + catch (Exception ex) |
| 182 | + { |
| 183 | + return (new(), ex.Message); |
| 184 | + } |
| 185 | + } |
| 186 | + |
| 187 | + private static async Task<object> GetBreakdownForDay(string token, string subId, DateTime day, string groupBy, System.Diagnostics.Activity? activity) |
| 188 | + { |
| 189 | + var body = JsonSerializer.Serialize(new |
| 190 | + { |
| 191 | + type = "ActualCost", |
| 192 | + timeframe = "Custom", |
| 193 | + timePeriod = new { from = day.ToString("yyyy-MM-dd"), to = day.ToString("yyyy-MM-dd") }, |
| 194 | + dataset = new |
| 195 | + { |
| 196 | + granularity = "None", |
| 197 | + aggregation = new { totalCost = new { name = "Cost", function = "Sum" } }, |
| 198 | + grouping = new[] { new { type = "Dimension", name = groupBy } }, |
| 199 | + sorting = new[] { new { direction = "descending", name = "Cost" } } |
| 200 | + } |
| 201 | + }); |
| 202 | + var url = $"https://management.azure.com/subscriptions/{subId}/providers/Microsoft.CostManagement/query?api-version=2025-03-01"; |
| 203 | + var resp = await HttpHelper.SendWithRetryAsync(url, token, activity, "anomaly.breakdown", |
| 204 | + method: HttpMethod.Post, jsonBody: body); |
| 205 | + |
| 206 | + if (!resp.StartsWith("HTTP 200")) return new { error = "could not fetch breakdown", detail = resp[..Math.Min(resp.Length, 300)] }; |
| 207 | + |
| 208 | + var json = resp[(resp.IndexOf('\n') + 1)..]; |
| 209 | + if (json.StartsWith("Current UTC time:")) json = json[(json.IndexOf('\n') + 1)..]; |
| 210 | + |
| 211 | + try |
| 212 | + { |
| 213 | + using var doc = JsonDocument.Parse(json); |
| 214 | + var rows = doc.RootElement.GetProperty("properties").GetProperty("rows"); |
| 215 | + var top = new List<object>(); |
| 216 | + int n = 0; |
| 217 | + foreach (var row in rows.EnumerateArray()) |
| 218 | + { |
| 219 | + if (n++ >= 5) break; |
| 220 | + top.Add(new { name = row[1].GetString() ?? "?", cost = Math.Round(row[0].GetDouble(), 2) }); |
| 221 | + } |
| 222 | + return top; |
| 223 | + } |
| 224 | + catch { return new { error = "parse failed" }; } |
| 225 | + } |
| 226 | +} |
0 commit comments