|
3 | 3 | // For the full copyright and license information, please view the LICENSE |
4 | 4 | // file that was distributed with this source code. |
5 | 5 |
|
6 | | -// spell-checker:ignore strtime ; (format) DATEFILE MMDDhhmm ; (vars) datetime datetimes getres AWST ACST AEST |
| 6 | +// spell-checker:ignore strtime ; (format) DATEFILE MMDDhhmm ; (vars) datetime datetimes getres AWST ACST AEST foobarbaz |
7 | 7 |
|
8 | 8 | mod locale; |
9 | 9 |
|
10 | 10 | use clap::{Arg, ArgAction, Command}; |
11 | 11 | use jiff::fmt::strtime; |
12 | 12 | use jiff::tz::{TimeZone, TimeZoneDatabase}; |
13 | 13 | use jiff::{Timestamp, Zoned}; |
| 14 | +use std::borrow::Cow; |
14 | 15 | use std::collections::HashMap; |
15 | 16 | use std::fs::File; |
16 | 17 | use std::io::{BufRead, BufReader, BufWriter, Write}; |
@@ -130,6 +131,52 @@ enum DayDelta { |
130 | 131 | Next, |
131 | 132 | } |
132 | 133 |
|
| 134 | +/// Strip parenthesized comments from a date string. |
| 135 | +/// |
| 136 | +/// GNU date removes balanced parentheses and their content, treating them as comments. |
| 137 | +/// If parentheses are unbalanced, everything from the unmatched '(' onwards is ignored. |
| 138 | +/// |
| 139 | +/// Examples: |
| 140 | +/// - "2026(comment)-01-05" -> "2026-01-05" |
| 141 | +/// - "1(ignore comment to eol" -> "1" |
| 142 | +/// - "(" -> "" |
| 143 | +/// - "((foo)2026-01-05)" -> "" |
| 144 | +fn strip_parenthesized_comments(input: &str) -> Cow<'_, str> { |
| 145 | + if !input.contains('(') { |
| 146 | + return Cow::Borrowed(input); |
| 147 | + } |
| 148 | + |
| 149 | + let mut result = String::with_capacity(input.len()); |
| 150 | + let mut chars = input.chars(); |
| 151 | + |
| 152 | + while let Some(c) = chars.next() { |
| 153 | + if c == '(' { |
| 154 | + // Look for matching closing parenthesis |
| 155 | + let mut depth = 1; |
| 156 | + for inner_c in chars.by_ref() { |
| 157 | + if inner_c == '(' { |
| 158 | + depth += 1; |
| 159 | + } else if inner_c == ')' { |
| 160 | + depth -= 1; |
| 161 | + if depth == 0 { |
| 162 | + break; |
| 163 | + } |
| 164 | + } |
| 165 | + } |
| 166 | + |
| 167 | + // If unmatched opening paren (depth > 0), stop processing entirely |
| 168 | + if depth > 0 { |
| 169 | + break; |
| 170 | + } |
| 171 | + // If balanced, the parentheses and their content are skipped (comment) |
| 172 | + } else { |
| 173 | + result.push(c); |
| 174 | + } |
| 175 | + } |
| 176 | + |
| 177 | + Cow::Owned(result) |
| 178 | +} |
| 179 | + |
133 | 180 | /// Parse military timezone with optional hour offset. |
134 | 181 | /// Pattern: single letter (a-z except j) optionally followed by 1-2 digits. |
135 | 182 | /// Returns Some(total_hours_in_utc) or None if pattern doesn't match. |
@@ -286,7 +333,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { |
286 | 333 | // Iterate over all dates - whether it's a single date or a file. |
287 | 334 | let dates: Box<dyn Iterator<Item = _>> = match settings.date_source { |
288 | 335 | DateSource::Human(ref input) => { |
| 336 | + // GNU compatibility (Comments in parentheses) |
| 337 | + let input = strip_parenthesized_comments(input); |
289 | 338 | let input = input.trim(); |
| 339 | + |
290 | 340 | // GNU compatibility (Empty string): |
291 | 341 | // An empty string (or whitespace-only) should be treated as midnight today. |
292 | 342 | let is_empty_or_whitespace = input.is_empty(); |
@@ -885,4 +935,38 @@ mod tests { |
885 | 935 | assert_eq!(parse_military_timezone_with_offset("m999"), None); // Too long |
886 | 936 | assert_eq!(parse_military_timezone_with_offset("9m"), None); // Starts with digit |
887 | 937 | } |
| 938 | + |
| 939 | + #[test] |
| 940 | + fn test_strip_parenthesized_comments() { |
| 941 | + assert_eq!(strip_parenthesized_comments("hello"), "hello"); |
| 942 | + assert_eq!(strip_parenthesized_comments("2026-01-05"), "2026-01-05"); |
| 943 | + assert_eq!(strip_parenthesized_comments("("), ""); |
| 944 | + assert_eq!(strip_parenthesized_comments("1(comment"), "1"); |
| 945 | + assert_eq!( |
| 946 | + strip_parenthesized_comments("2026-01-05(this is a comment"), |
| 947 | + "2026-01-05" |
| 948 | + ); |
| 949 | + assert_eq!( |
| 950 | + strip_parenthesized_comments("2026(comment)-01-05"), |
| 951 | + "2026-01-05" |
| 952 | + ); |
| 953 | + assert_eq!(strip_parenthesized_comments("()"), ""); |
| 954 | + assert_eq!(strip_parenthesized_comments("((foo)2026-01-05)"), ""); |
| 955 | + |
| 956 | + // These cases test the balanced parentheses removal feature |
| 957 | + // which extends beyond what GNU date strictly supports |
| 958 | + assert_eq!(strip_parenthesized_comments("a(b)c"), "ac"); |
| 959 | + assert_eq!(strip_parenthesized_comments("a(b)c(d)e"), "ace"); |
| 960 | + assert_eq!(strip_parenthesized_comments("(a)(b)"), ""); |
| 961 | + |
| 962 | + // When parentheses are unmatched, processing stops at the unmatched opening paren |
| 963 | + // In this case "a(b)c(d", the (b) is balanced but (d is unmatched |
| 964 | + // We process "a(b)c" and stop at the unmatched "(d" |
| 965 | + assert_eq!(strip_parenthesized_comments("a(b)c(d"), "ac"); |
| 966 | + |
| 967 | + // Additional edge cases for nested and complex parentheses |
| 968 | + assert_eq!(strip_parenthesized_comments("a(b(c)d)e"), "ae"); // Nested balanced |
| 969 | + assert_eq!(strip_parenthesized_comments("a(b(c)d"), "a"); // Nested unbalanced |
| 970 | + assert_eq!(strip_parenthesized_comments("a(b)c(d)e(f"), "ace"); // Multiple groups, last unmatched |
| 971 | + } |
888 | 972 | } |
0 commit comments