Skip to content

Commit 8c821bb

Browse files
committed
parse-zoneinfo: replace rule parser with simple state machine
1 parent 9e62ba1 commit 8c821bb

File tree

1 file changed

+177
-59
lines changed

1 file changed

+177
-59
lines changed

Diff for: parse-zoneinfo/src/line.rs

+177-59
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ use std::ascii::AsciiExt;
7777
use regex::{Captures, Regex};
7878

7979
pub struct LineParser {
80-
rule_line: Regex,
8180
day_field: Regex,
8281
hm_field: Regex,
8382
hms_field: Regex,
@@ -137,23 +136,6 @@ impl std::error::Error for Error {}
137136
impl Default for LineParser {
138137
fn default() -> Self {
139138
LineParser {
140-
rule_line: Regex::new(
141-
r##"(?x) ^
142-
Rule \s+
143-
( ?P<name> \S+) \s+
144-
( ?P<from> \S+) \s+
145-
( ?P<to> \S+) \s+
146-
( ?P<type> \S+) \s+
147-
( ?P<in> \S+) \s+
148-
( ?P<on> \S+) \s+
149-
( ?P<at> \S+) \s+
150-
( ?P<save> \S+) \s+
151-
( ?P<letters> \S+) \s*
152-
(\#.*)?
153-
$ "##,
154-
)
155-
.unwrap(),
156-
157139
day_field: Regex::new(
158140
r##"(?x) ^
159141
( ?P<weekday> \w+ )
@@ -952,49 +934,133 @@ impl LineParser {
952934
}
953935

954936
fn parse_rule<'a>(&self, input: &'a str) -> Result<Rule<'a>, Error> {
955-
if let Some(caps) = self.rule_line.captures(input) {
956-
let name = caps.name("name").unwrap().as_str();
957-
958-
let from_year = caps.name("from").unwrap().as_str().parse()?;
959-
960-
// The end year can be ‘only’ to indicate that this rule only
961-
// takes place on that year.
962-
let to_year = match caps.name("to").unwrap().as_str() {
963-
"only" => None,
964-
to => Some(to.parse()?),
965-
};
966-
967-
// According to the spec, the only value inside the ‘type’ column
968-
// should be “-”, so throw an error if it isn’t. (It only exists
969-
// for compatibility with old versions that used to contain year
970-
// types.) Sometimes “‐”, a Unicode hyphen, is used as well.
971-
let t = caps.name("type").unwrap().as_str();
972-
if t != "-" && t != "\u{2010}" {
973-
return Err(Error::TypeColumnContainedNonHyphen(t.to_string()));
974-
}
975-
976-
let month = caps.name("in").unwrap().as_str().parse()?;
977-
let day = self.parse_dayspec(caps.name("on").unwrap().as_str())?;
978-
let time = self.parse_timespec_and_type(caps.name("at").unwrap().as_str())?;
979-
let time_to_add = self.parse_timespec(caps.name("save").unwrap().as_str())?;
980-
let letters = match caps.name("letters").unwrap().as_str() {
981-
"-" => None,
982-
l => Some(l),
937+
let mut state = RuleState::Start;
938+
for part in input.split_ascii_whitespace() {
939+
state = match (state, part) {
940+
(RuleState::Start, "Rule") => RuleState::Name,
941+
(RuleState::Name, name) => RuleState::FromYear { name },
942+
(RuleState::FromYear { name }, year) => RuleState::ToYear {
943+
name,
944+
from_year: Year::from_str(year)?,
945+
},
946+
(RuleState::ToYear { name, from_year }, year) => RuleState::Type {
947+
name,
948+
from_year,
949+
to_year: match year {
950+
"only" => None,
951+
_ => Some(Year::from_str(year)?),
952+
},
953+
},
954+
(
955+
RuleState::Type {
956+
name,
957+
from_year,
958+
to_year,
959+
},
960+
"-" | "\u{2010}",
961+
) => RuleState::Month {
962+
name,
963+
from_year,
964+
to_year,
965+
},
966+
(RuleState::Type { .. }, _) => {
967+
return Err(Error::TypeColumnContainedNonHyphen(part.to_string()))
968+
}
969+
(
970+
RuleState::Month {
971+
name,
972+
from_year,
973+
to_year,
974+
},
975+
month,
976+
) => RuleState::Day {
977+
name,
978+
from_year,
979+
to_year,
980+
month: Month::from_str(month)?,
981+
},
982+
(
983+
RuleState::Day {
984+
name,
985+
from_year,
986+
to_year,
987+
month,
988+
},
989+
day,
990+
) => RuleState::Time {
991+
name,
992+
from_year,
993+
to_year,
994+
month,
995+
day: self.parse_dayspec(day)?,
996+
},
997+
(
998+
RuleState::Time {
999+
name,
1000+
from_year,
1001+
to_year,
1002+
month,
1003+
day,
1004+
},
1005+
time,
1006+
) => RuleState::TimeToAdd {
1007+
name,
1008+
from_year,
1009+
to_year,
1010+
month,
1011+
day,
1012+
time: self.parse_timespec_and_type(time)?,
1013+
},
1014+
(
1015+
RuleState::TimeToAdd {
1016+
name,
1017+
from_year,
1018+
to_year,
1019+
month,
1020+
day,
1021+
time,
1022+
},
1023+
time_to_add,
1024+
) => RuleState::Letters {
1025+
name,
1026+
from_year,
1027+
to_year,
1028+
month,
1029+
day,
1030+
time,
1031+
time_to_add: self.parse_timespec(time_to_add)?,
1032+
},
1033+
(
1034+
RuleState::Letters {
1035+
name,
1036+
from_year,
1037+
to_year,
1038+
month,
1039+
day,
1040+
time,
1041+
time_to_add,
1042+
},
1043+
letters,
1044+
) => {
1045+
return Ok(Rule {
1046+
name,
1047+
from_year,
1048+
to_year,
1049+
month,
1050+
day,
1051+
time,
1052+
time_to_add,
1053+
letters: match letters {
1054+
"-" => None,
1055+
_ => Some(letters),
1056+
},
1057+
})
1058+
}
1059+
_ => return Err(Error::NotParsedAsRuleLine),
9831060
};
984-
985-
Ok(Rule {
986-
name,
987-
from_year,
988-
to_year,
989-
month,
990-
day,
991-
time,
992-
time_to_add,
993-
letters,
994-
})
995-
} else {
996-
Err(Error::NotParsedAsRuleLine)
9971061
}
1062+
1063+
Err(Error::NotParsedAsRuleLine)
9981064
}
9991065

10001066
fn saving_from_str<'a>(&self, input: &'a str) -> Result<Saving<'a>, Error> {
@@ -1109,6 +1175,58 @@ impl LineParser {
11091175
}
11101176
}
11111177

1178+
enum RuleState<'a> {
1179+
Start,
1180+
Name,
1181+
FromYear {
1182+
name: &'a str,
1183+
},
1184+
ToYear {
1185+
name: &'a str,
1186+
from_year: Year,
1187+
},
1188+
Type {
1189+
name: &'a str,
1190+
from_year: Year,
1191+
to_year: Option<Year>,
1192+
},
1193+
Month {
1194+
name: &'a str,
1195+
from_year: Year,
1196+
to_year: Option<Year>,
1197+
},
1198+
Day {
1199+
name: &'a str,
1200+
from_year: Year,
1201+
to_year: Option<Year>,
1202+
month: Month,
1203+
},
1204+
Time {
1205+
name: &'a str,
1206+
from_year: Year,
1207+
to_year: Option<Year>,
1208+
month: Month,
1209+
day: DaySpec,
1210+
},
1211+
TimeToAdd {
1212+
name: &'a str,
1213+
from_year: Year,
1214+
to_year: Option<Year>,
1215+
month: Month,
1216+
day: DaySpec,
1217+
time: TimeSpecAndType,
1218+
},
1219+
Letters {
1220+
name: &'a str,
1221+
from_year: Year,
1222+
to_year: Option<Year>,
1223+
month: Month,
1224+
day: DaySpec,
1225+
time: TimeSpecAndType,
1226+
time_to_add: TimeSpec,
1227+
},
1228+
}
1229+
11121230
#[cfg(test)]
11131231
mod tests {
11141232
use super::*;

0 commit comments

Comments
 (0)