|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Fix cumulative database by properly tracking what was already counted |
| 4 | +""" |
| 5 | + |
| 6 | +import sys |
| 7 | +import io |
| 8 | +import json |
| 9 | +import hashlib |
| 10 | +from datetime import datetime, timezone |
| 11 | +from pathlib import Path |
| 12 | + |
| 13 | +# Set UTF-8 encoding |
| 14 | +if sys.platform == 'win32': |
| 15 | + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') |
| 16 | + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') |
| 17 | + |
| 18 | +# Paths |
| 19 | +PROJECT_DIR = Path.home() / ".claude" / "projects" |
| 20 | +CUMULATIVE_DB = Path.home() / ".claude" / "cumulative_usage.json" |
| 21 | + |
| 22 | +# Historical peak (Nov 21, 2025 22:04) |
| 23 | +PEAK_DATE = datetime(2025, 11, 21, 22, 4, 0, tzinfo=timezone.utc) |
| 24 | +CUTOFF_DATE = datetime(2025, 10, 1, tzinfo=timezone.utc) |
| 25 | + |
| 26 | +HISTORICAL_PEAK = { |
| 27 | + "input_tokens": 122396, |
| 28 | + "output_tokens": 2615355, |
| 29 | + "cache_creation_tokens": 45201785, |
| 30 | + "cache_read_tokens": 351620275, |
| 31 | + "total_sessions": 5035 |
| 32 | +} |
| 33 | + |
| 34 | +def create_session_id(file_path, timestamp, usage_data): |
| 35 | + """Create unique session ID""" |
| 36 | + unique_str = f"{file_path.name}_{timestamp}_{usage_data.get('input_tokens', 0)}_{usage_data.get('output_tokens', 0)}" |
| 37 | + return hashlib.md5(unique_str.encode()).hexdigest() |
| 38 | + |
| 39 | +def scan_and_mark_old_sessions(): |
| 40 | + """Scan all sessions and mark those before peak date as processed""" |
| 41 | + print("🔍 Scanning for sessions before Nov 21, 2025 22:04...") |
| 42 | + print() |
| 43 | + |
| 44 | + jsonl_files = list(PROJECT_DIR.glob("**/*.jsonl")) |
| 45 | + |
| 46 | + old_sessions = {} # Sessions before peak |
| 47 | + new_sessions = {} # Sessions after peak |
| 48 | + |
| 49 | + old_count = 0 |
| 50 | + new_count = 0 |
| 51 | + |
| 52 | + for jsonl_file in jsonl_files: |
| 53 | + try: |
| 54 | + with open(jsonl_file, 'r', encoding='utf-8') as f: |
| 55 | + for line in f: |
| 56 | + if not line.strip(): |
| 57 | + continue |
| 58 | + |
| 59 | + try: |
| 60 | + data = json.loads(line) |
| 61 | + |
| 62 | + if 'timestamp' not in data: |
| 63 | + continue |
| 64 | + |
| 65 | + timestamp_str = data['timestamp'] |
| 66 | + timestamp = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00')) |
| 67 | + |
| 68 | + # Skip sessions before cutoff |
| 69 | + if timestamp < CUTOFF_DATE: |
| 70 | + continue |
| 71 | + |
| 72 | + if 'message' not in data or 'usage' not in data['message']: |
| 73 | + continue |
| 74 | + |
| 75 | + usage = data['message']['usage'] |
| 76 | + session_id = create_session_id(jsonl_file, timestamp_str, usage) |
| 77 | + |
| 78 | + session_data = { |
| 79 | + "file": jsonl_file.name, |
| 80 | + "timestamp": timestamp_str, |
| 81 | + "input_tokens": usage.get('input_tokens', 0), |
| 82 | + "output_tokens": usage.get('output_tokens', 0), |
| 83 | + "cache_creation_tokens": usage.get('cache_creation_input_tokens', 0), |
| 84 | + "cache_read_tokens": usage.get('cache_read_input_tokens', 0) |
| 85 | + } |
| 86 | + |
| 87 | + if timestamp <= PEAK_DATE: |
| 88 | + old_sessions[session_id] = session_data |
| 89 | + old_count += 1 |
| 90 | + else: |
| 91 | + new_sessions[session_id] = session_data |
| 92 | + new_count += 1 |
| 93 | + |
| 94 | + except json.JSONDecodeError: |
| 95 | + continue |
| 96 | + |
| 97 | + except Exception as e: |
| 98 | + continue |
| 99 | + |
| 100 | + print(f"✅ Found {old_count:,} sessions BEFORE peak (Nov 21)") |
| 101 | + print(f"✅ Found {new_count:,} sessions AFTER peak") |
| 102 | + print() |
| 103 | + |
| 104 | + return old_sessions, new_sessions |
| 105 | + |
| 106 | +def calculate_new_tokens(new_sessions): |
| 107 | + """Calculate tokens from new sessions""" |
| 108 | + tokens = { |
| 109 | + "input_tokens": 0, |
| 110 | + "output_tokens": 0, |
| 111 | + "cache_creation_tokens": 0, |
| 112 | + "cache_read_tokens": 0 |
| 113 | + } |
| 114 | + |
| 115 | + for session_data in new_sessions.values(): |
| 116 | + tokens["input_tokens"] += session_data["input_tokens"] |
| 117 | + tokens["output_tokens"] += session_data["output_tokens"] |
| 118 | + tokens["cache_creation_tokens"] += session_data["cache_creation_tokens"] |
| 119 | + tokens["cache_read_tokens"] += session_data["cache_read_tokens"] |
| 120 | + |
| 121 | + return tokens |
| 122 | + |
| 123 | +def create_fixed_database(old_sessions, new_sessions, new_tokens): |
| 124 | + """Create properly fixed database""" |
| 125 | + |
| 126 | + # Combine old and new processed sessions |
| 127 | + all_processed = {**old_sessions, **new_sessions} |
| 128 | + |
| 129 | + # Calculate cumulative totals (peak + new) |
| 130 | + cumulative = { |
| 131 | + "input_tokens": HISTORICAL_PEAK["input_tokens"] + new_tokens["input_tokens"], |
| 132 | + "output_tokens": HISTORICAL_PEAK["output_tokens"] + new_tokens["output_tokens"], |
| 133 | + "cache_creation_tokens": HISTORICAL_PEAK["cache_creation_tokens"] + new_tokens["cache_creation_tokens"], |
| 134 | + "cache_read_tokens": HISTORICAL_PEAK["cache_read_tokens"] + new_tokens["cache_read_tokens"], |
| 135 | + "total_sessions": HISTORICAL_PEAK["total_sessions"] + len(new_sessions) |
| 136 | + } |
| 137 | + |
| 138 | + db = { |
| 139 | + "created_at": datetime.now(timezone.utc).isoformat(), |
| 140 | + "last_updated": datetime.now(timezone.utc).isoformat(), |
| 141 | + "period_start": "2025-10-01", |
| 142 | + "restored_from_peak": "2025-11-21", |
| 143 | + "cumulative_usage": cumulative, |
| 144 | + "processed_sessions": all_processed, |
| 145 | + "run_history": [ |
| 146 | + { |
| 147 | + "timestamp": datetime.now(timezone.utc).isoformat(), |
| 148 | + "action": "fixed_database_with_proper_session_tracking", |
| 149 | + "peak_sessions": HISTORICAL_PEAK["total_sessions"], |
| 150 | + "new_sessions": len(new_sessions), |
| 151 | + "total_sessions": cumulative["total_sessions"] |
| 152 | + } |
| 153 | + ] |
| 154 | + } |
| 155 | + |
| 156 | + return db |
| 157 | + |
| 158 | +def main(): |
| 159 | + """Main execution""" |
| 160 | + print() |
| 161 | + print("=" * 70) |
| 162 | + print("🔧 FIXING CUMULATIVE DATABASE") |
| 163 | + print("=" * 70) |
| 164 | + print() |
| 165 | + |
| 166 | + # Backup current |
| 167 | + if CUMULATIVE_DB.exists(): |
| 168 | + backup_file = CUMULATIVE_DB.parent / f"cumulative_usage_before_fix_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" |
| 169 | + with open(CUMULATIVE_DB, 'r', encoding='utf-8') as f: |
| 170 | + data = json.load(f) |
| 171 | + with open(backup_file, 'w', encoding='utf-8') as f: |
| 172 | + json.dump(data, f, indent=2, ensure_ascii=False) |
| 173 | + print(f"✅ Backed up current database to: {backup_file}") |
| 174 | + print() |
| 175 | + |
| 176 | + # Scan sessions |
| 177 | + old_sessions, new_sessions = scan_and_mark_old_sessions() |
| 178 | + |
| 179 | + # Calculate new tokens |
| 180 | + new_tokens = calculate_new_tokens(new_sessions) |
| 181 | + |
| 182 | + print("📊 New sessions since Nov 21:") |
| 183 | + print(f" Count: {len(new_sessions):,}") |
| 184 | + print(f" Input: {new_tokens['input_tokens']:,}") |
| 185 | + print(f" Output: {new_tokens['output_tokens']:,}") |
| 186 | + print(f" Cache Creation: {new_tokens['cache_creation_tokens']:,}") |
| 187 | + print(f" Cache Read: {new_tokens['cache_read_tokens']:,}") |
| 188 | + print() |
| 189 | + |
| 190 | + # Create fixed database |
| 191 | + db = create_fixed_database(old_sessions, new_sessions, new_tokens) |
| 192 | + |
| 193 | + cumulative = db["cumulative_usage"] |
| 194 | + total_processed = ( |
| 195 | + cumulative["input_tokens"] + |
| 196 | + cumulative["output_tokens"] + |
| 197 | + cumulative["cache_creation_tokens"] |
| 198 | + ) |
| 199 | + |
| 200 | + print("=" * 70) |
| 201 | + print("✅ FIXED CUMULATIVE TOTALS") |
| 202 | + print("=" * 70) |
| 203 | + print() |
| 204 | + print(f"📊 Total Sessions: {cumulative['total_sessions']:,}") |
| 205 | + print() |
| 206 | + print(f"🔢 TOKEN TOTALS:") |
| 207 | + print(f" Input: {cumulative['input_tokens']:,}") |
| 208 | + print(f" Output: {cumulative['output_tokens']:,}") |
| 209 | + print(f" Cache Creation: {cumulative['cache_creation_tokens']:,}") |
| 210 | + print(f" Cache Read: {cumulative['cache_read_tokens']:,}") |
| 211 | + print() |
| 212 | + print(f"💰 TOTAL PROCESSED: {total_processed:,} tokens ({total_processed/1_000_000:.2f}M)") |
| 213 | + print() |
| 214 | + |
| 215 | + # Calculate cost |
| 216 | + input_cost = (cumulative['input_tokens'] / 1_000_000) * 3.0 |
| 217 | + output_cost = (cumulative['output_tokens'] / 1_000_000) * 15.0 |
| 218 | + cache_write_cost = (cumulative['cache_creation_tokens'] / 1_000_000) * 3.75 |
| 219 | + cache_read_cost = (cumulative['cache_read_tokens'] / 1_000_000) * 0.30 |
| 220 | + total_cost = input_cost + output_cost + cache_write_cost + cache_read_cost |
| 221 | + |
| 222 | + print(f"💵 ESTIMATED COST: ${total_cost:.2f}") |
| 223 | + print() |
| 224 | + print("=" * 70) |
| 225 | + |
| 226 | + # Save database |
| 227 | + with open(CUMULATIVE_DB, 'w', encoding='utf-8') as f: |
| 228 | + json.dump(db, f, indent=2, ensure_ascii=False) |
| 229 | + |
| 230 | + print() |
| 231 | + print(f"✅ Database saved to: {CUMULATIVE_DB}") |
| 232 | + print() |
| 233 | + print("🎉 Database is now FIXED and properly tracking all sessions!") |
| 234 | + print() |
| 235 | + |
| 236 | +if __name__ == "__main__": |
| 237 | + main() |
0 commit comments