-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathutils.rs
More file actions
396 lines (348 loc) · 13.1 KB
/
utils.rs
File metadata and controls
396 lines (348 loc) · 13.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
use std::collections::{BTreeMap, HashSet};
use std::sync::OnceLock;
use anyhow::Result;
use chrono::{DateTime, Datelike, Local, Utc};
use num_format::{Locale, ToFormattedString};
use parking_lot::Mutex;
use serde::{Deserialize, Deserializer};
use sha2::{Digest, Sha256};
use xxhash_rust::xxh3::xxh3_64;
use crate::types::{CompactDate, ConversationMessage, DailyStats, ModelStats};
static WARNED_MESSAGES: OnceLock<Mutex<HashSet<String>>> = OnceLock::new();
pub fn warn_once(message: impl Into<String>) {
let message = message.into();
let cache = WARNED_MESSAGES.get_or_init(|| Mutex::new(HashSet::new()));
if cache.lock().insert(message.clone()) {
eprintln!("{message}");
}
}
/// Like warn_once, but prints in yellow with a warning emoji
pub fn warn_once_yellow(message: impl Into<String>) {
let message = message.into();
let cache = WARNED_MESSAGES.get_or_init(|| Mutex::new(HashSet::new()));
if cache.lock().insert(message.clone()) {
// ANSI escape codes: \x1b[33m = yellow, \x1b[0m = reset
eprintln!("\x1b[33m⚠️ {message}\x1b[0m");
}
}
#[derive(Clone)]
pub struct NumberFormatOptions {
pub use_comma: bool,
pub use_human: bool,
pub locale: String,
pub decimal_places: usize,
}
/// Format a number for display. Accepts both u32 and u64.
pub fn format_number(n: impl Into<u64>, options: &NumberFormatOptions) -> String {
let n: u64 = n.into();
let locale = match options.locale.as_str() {
"de" => Locale::de,
"fr" => Locale::fr,
"es" => Locale::es,
"it" => Locale::it,
"ja" => Locale::ja,
"ko" => Locale::ko,
"zh" => Locale::zh,
_ => Locale::en,
};
if options.use_human {
if n >= 1_000_000_000_000 {
format!(
"{:.prec$}t",
n as f64 / 1_000_000_000_000.0,
prec = options.decimal_places
)
} else if n >= 1_000_000_000 {
format!(
"{:.prec$}b",
n as f64 / 1_000_000_000.0,
prec = options.decimal_places
)
} else if n >= 1_000_000 {
format!(
"{:.prec$}m",
n as f64 / 1_000_000.0,
prec = options.decimal_places
)
} else if n >= 1_000 {
format!(
"{:.prec$}k",
n as f64 / 1_000.0,
prec = options.decimal_places
)
} else {
n.to_string()
}
} else if options.use_comma {
n.to_formatted_string(&locale)
} else {
n.to_string()
}
}
/// Format a number to fit within a given column width.
///
/// Falls back to progressively more compact representations if the user's
/// preferred format (commas, plain digits, etc.) overflows the column:
/// 1. User's preferred format
/// 2. Human-readable with configured decimal places (e.g. "193.1m")
/// 3. Human-readable with fewer decimal places (e.g. "193m")
/// 4. Plain digits (no separators)
///
/// This ensures that the most significant digits are never clipped by
/// ratatui's column rendering — instead the number is abbreviated.
pub fn format_number_fit(
n: impl Into<u64>,
options: &NumberFormatOptions,
max_width: usize,
) -> String {
let n: u64 = n.into();
// 1. Try the user's preferred format first
let preferred = format_number(n, options);
if preferred.len() <= max_width {
return preferred;
}
// 2. Try human-readable with configured decimal places
let human_options = NumberFormatOptions {
use_human: true,
use_comma: false,
locale: options.locale.clone(),
decimal_places: options.decimal_places,
};
let human = format_number(n, &human_options);
if human.len() <= max_width {
return human;
}
// 3. Try human-readable with progressively fewer decimal places
for dp in (0..options.decimal_places).rev() {
let compact_options = NumberFormatOptions {
use_human: true,
use_comma: false,
locale: options.locale.clone(),
decimal_places: dp,
};
let compact = format_number(n, &compact_options);
if compact.len() <= max_width {
return compact;
}
}
// 4. Fall back to plain digits (no separators)
let plain = n.to_string();
if plain.len() <= max_width {
return plain;
}
// 5. Last resort: human-readable with 0 decimal places (should always be short)
let minimal = NumberFormatOptions {
use_human: true,
use_comma: false,
locale: options.locale.clone(),
decimal_places: 0,
};
format_number(n, &minimal)
}
pub fn format_date_for_display(date: &str) -> String {
if date == "unknown" {
return "Unknown".to_string();
}
if let Ok(parsed) = chrono::NaiveDate::parse_from_str(date, "%Y-%m-%d") {
// Format with non-padded month and day
let month = parsed.month();
let day = parsed.day();
let year = parsed.year();
let formatted = format!("{month}/{day}/{year}");
// Check if this is today's date
let today = chrono::Local::now().date_naive();
if parsed == today {
format!("{formatted}*")
} else {
formatted
}
} else {
date.to_string()
}
}
// TODO: Don't use strings here, wasteful.
pub fn aggregate_by_date(entries: &[ConversationMessage]) -> BTreeMap<String, DailyStats> {
let mut daily_stats: BTreeMap<String, DailyStats> = BTreeMap::new();
let mut conversation_start_dates: BTreeMap<String, String> = BTreeMap::new();
for entry in entries {
let timestamp = &entry.date.with_timezone(&Local);
let conversation_hash = &entry.conversation_hash;
let date = timestamp.format("%Y-%m-%d").to_string();
// Only update if this is earlier than what we've seen, or if we haven't seen this
// conversation before. This is to handle the case where a conversation spans
// multiple days, we'd want to ascribe it to the day on which it was started.
conversation_start_dates
.entry(conversation_hash.clone())
.and_modify(|existing_date| {
if date < *existing_date {
*existing_date = date.clone();
}
})
.or_insert(date.clone());
let daily_stats_entry = daily_stats
.entry(date.clone())
.or_insert_with(|| DailyStats {
date: CompactDate::from_local(&entry.date),
..Default::default()
});
match &entry.model {
Some(model) => {
// AI message
daily_stats_entry.ai_messages += 1;
*daily_stats_entry
.models
.entry(model.to_string())
.or_insert(0) += 1;
// Aggregate TUI-relevant stats only (TuiStats has 6 fields)
daily_stats_entry.stats.add_cost(entry.stats.cost);
daily_stats_entry.stats.input_tokens = daily_stats_entry
.stats
.input_tokens
.saturating_add(entry.stats.input_tokens);
daily_stats_entry.stats.output_tokens = daily_stats_entry
.stats
.output_tokens
.saturating_add(entry.stats.output_tokens);
daily_stats_entry.stats.reasoning_tokens = daily_stats_entry
.stats
.reasoning_tokens
.saturating_add(entry.stats.reasoning_tokens);
daily_stats_entry.stats.cached_tokens = daily_stats_entry
.stats
.cached_tokens
.saturating_add(entry.stats.cached_tokens);
daily_stats_entry.stats.tool_calls = daily_stats_entry
.stats
.tool_calls
.saturating_add(entry.stats.tool_calls);
// Aggregate per-model stats for JSON output
daily_stats_entry
.model_stats
.entry(model.to_string())
.or_insert_with(|| ModelStats::new(model.to_string()))
.add_message(&entry.stats);
}
None => {
// User message - no TUI-relevant stats to aggregate
daily_stats_entry.user_messages += 1;
}
};
}
// Track conversations started on each date and update daily stats
for start_date in conversation_start_dates.values() {
if let Some(daily_stats_entry) = daily_stats.get_mut(start_date) {
daily_stats_entry.conversations += 1;
}
}
// If there are any gaps (days Claude Code wasn't run) fill them in with
// empty stats. (TODO: This should be a utility.)
if !daily_stats.is_empty() {
let mut filled_stats = BTreeMap::new();
let earliest_date = daily_stats.keys().min().unwrap();
let today_str = chrono::Local::now()
.date_naive()
.format("%Y-%m-%d")
.to_string();
let latest_date = daily_stats.keys().max().unwrap().max(&today_str); // Either today or the highest date in data.
let start_date = match chrono::NaiveDate::parse_from_str(earliest_date, "%Y-%m-%d") {
Ok(date) => date,
Err(_) => return daily_stats, // Ignore.
};
let end_date = match chrono::NaiveDate::parse_from_str(latest_date, "%Y-%m-%d") {
Ok(date) => date,
Err(_) => return daily_stats, // Ignore.
};
// Fill in the gaps.
let mut current_date = start_date;
while current_date <= end_date {
let date_str = current_date.format("%Y-%m-%d").to_string();
if let Some(existing_stats) = daily_stats.get(&date_str) {
filled_stats.insert(date_str, existing_stats.clone());
} else {
filled_stats.insert(
date_str.clone(),
DailyStats {
date: CompactDate::from_str(&date_str).unwrap_or_default(),
..Default::default()
},
);
}
current_date += chrono::Duration::days(1);
}
return filled_stats;
}
daily_stats
}
/// Filters messages to only include those created after a specific date
pub async fn get_messages_later_than(
date: i64,
messages: Vec<ConversationMessage>,
) -> Result<Vec<ConversationMessage>> {
let mut messages_later_than_date = Vec::new();
for msg in messages {
if msg.date.timestamp_millis() >= date {
messages_later_than_date.push(msg);
}
}
Ok(messages_later_than_date)
}
/// Filters messages to only include those with zero (or near-zero) cost
pub fn filter_zero_cost_messages(messages: Vec<ConversationMessage>) -> Vec<ConversationMessage> {
const EPSILON: f64 = 1e-10;
messages
.into_iter()
.filter(|msg| msg.stats.cost.abs() < EPSILON)
.collect()
}
pub fn hash_text(text: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(text);
format!("{:x}", hasher.finalize())
}
/// Fast hash for local deduplication only (NOT for cloud - use hash_text for global_hash)
pub fn fast_hash(text: &str) -> String {
format!("{:016x}", xxh3_64(text.as_bytes()))
}
/// Sequential deduplication by global_hash using HashSet.
/// Used for post-init processing (incremental updates, uploads).
pub fn deduplicate_by_global_hash(messages: Vec<ConversationMessage>) -> Vec<ConversationMessage> {
use std::collections::HashSet;
let mut seen: HashSet<String> = HashSet::with_capacity(messages.len() / 2);
messages
.into_iter()
.filter(|msg| seen.insert(msg.global_hash.clone()))
.collect()
}
/// Sequential deduplication by local_hash using HashSet.
/// Messages without local_hash are always kept.
/// Used for post-init processing (incremental updates, uploads).
pub fn deduplicate_by_local_hash(messages: Vec<ConversationMessage>) -> Vec<ConversationMessage> {
use std::collections::HashSet;
let mut seen: HashSet<String> = HashSet::with_capacity(messages.len() / 2);
messages
.into_iter()
.filter(|msg| {
if let Some(local_hash) = &msg.local_hash {
seen.insert(local_hash.clone())
} else {
true // Always keep messages without local_hash
}
})
.collect()
}
/// Custom serde deserializer for RFC3339 timestamp strings to `DateTime<Utc>`
pub fn deserialize_utc_timestamp<'de, D>(deserializer: D) -> Result<DateTime<Utc>, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
DateTime::parse_from_rfc3339(&s)
.map(|dt| dt.into())
.map_err(serde::de::Error::custom)
}
/// Get the system's local timezone as an IANA timezone string (e.g., "America/Chicago")
pub fn get_local_timezone() -> String {
iana_time_zone::get_timezone().unwrap_or_else(|_| "UTC".to_string())
}
#[cfg(test)]
mod tests;