Skip to content

parse-zoneinfo: replace rule parser with simple state machine #172

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
253 changes: 186 additions & 67 deletions parse-zoneinfo/src/line.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ use std::str::FromStr;
use regex::{Captures, Regex};

pub struct LineParser {
rule_line: Regex,
zone_line: Regex,
continuation_line: Regex,
link_line: Regex,
Expand Down Expand Up @@ -131,23 +130,6 @@ impl std::error::Error for Error {}
impl Default for LineParser {
fn default() -> Self {
LineParser {
rule_line: Regex::new(
r##"(?x) ^
Rule \s+
( ?P<name> \S+) \s+
( ?P<from> \S+) \s+
( ?P<to> \S+) \s+
( ?P<type> \S+) \s+
( ?P<in> \S+) \s+
( ?P<on> \S+) \s+
( ?P<at> \S+) \s+
( ?P<save> \S+) \s+
( ?P<letters> \S+) \s*
(\#.*)?
$ "##,
)
.unwrap(),

zone_line: Regex::new(
r##"(?x) ^
Zone \s+
Expand Down Expand Up @@ -877,6 +859,190 @@ pub struct Rule<'a> {
pub letters: Option<&'a str>,
}

impl<'a> Rule<'a> {
fn from_str(input: &'a str) -> Result<Self, Error> {
let mut state = RuleState::Start;
for part in input.split_ascii_whitespace() {
Copy link
Contributor

@pitdicker pitdicker May 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This no longer parses a rule with a comment?

zic.c has a getfields method (line 3722) that returns when it encounters a comment sign #.
It also supports quotation marks " surrounding each field, within which whitespace and # is allowed. Maybe we should make an iterator that works similar instead of using split_ascii_whitespace?

state = match (state, part) {
(RuleState::Start, "Rule") => RuleState::Name,
(RuleState::Name, name) => RuleState::FromYear { name },
(RuleState::FromYear { name }, year) => RuleState::ToYear {
name,
from_year: Year::from_str(year)?,
},
(RuleState::ToYear { name, from_year }, year) => RuleState::Type {
name,
from_year,
to_year: match year {
"only" => None,
_ => Some(Year::from_str(year)?),
},
},
(
RuleState::Type {
name,
from_year,
to_year,
},
"-" | "\u{2010}",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please add back the comment?

) => RuleState::Month {
name,
from_year,
to_year,
},
(RuleState::Type { .. }, _) => {
return Err(Error::TypeColumnContainedNonHyphen(part.to_string()))
}
(
RuleState::Month {
name,
from_year,
to_year,
},
month,
) => RuleState::Day {
name,
from_year,
to_year,
month: Month::from_str(month)?,
},
(
RuleState::Day {
name,
from_year,
to_year,
month,
},
day,
) => RuleState::Time {
name,
from_year,
to_year,
month,
day: DaySpec::from_str(day)?,
},
(
RuleState::Time {
name,
from_year,
to_year,
month,
day,
},
time,
) => RuleState::TimeToAdd {
name,
from_year,
to_year,
month,
day,
time: TimeSpecAndType::from_str(time)?,
},
(
RuleState::TimeToAdd {
name,
from_year,
to_year,
month,
day,
time,
},
time_to_add,
) => RuleState::Letters {
name,
from_year,
to_year,
month,
day,
time,
time_to_add: TimeSpec::from_str(time_to_add)?,
},
(
RuleState::Letters {
name,
from_year,
to_year,
month,
day,
time,
time_to_add,
},
letters,
) => {
return Ok(Self {
name,
from_year,
to_year,
month,
day,
time,
time_to_add,
letters: match letters {
"-" => None,
_ => Some(letters),
},
})
}
_ => return Err(Error::NotParsedAsRuleLine),
};
}

Err(Error::NotParsedAsRuleLine)
}
}

enum RuleState<'a> {
Start,
Name,
FromYear {
name: &'a str,
},
ToYear {
name: &'a str,
from_year: Year,
},
Type {
name: &'a str,
from_year: Year,
to_year: Option<Year>,
},
Month {
name: &'a str,
from_year: Year,
to_year: Option<Year>,
},
Day {
name: &'a str,
from_year: Year,
to_year: Option<Year>,
month: Month,
},
Time {
name: &'a str,
from_year: Year,
to_year: Option<Year>,
month: Month,
day: DaySpec,
},
TimeToAdd {
name: &'a str,
from_year: Year,
to_year: Option<Year>,
month: Month,
day: DaySpec,
time: TimeSpecAndType,
},
Letters {
name: &'a str,
from_year: Year,
to_year: Option<Year>,
month: Month,
day: DaySpec,
time: TimeSpecAndType,
time_to_add: TimeSpec,
},
}

/// A **zone** definition line.
///
/// According to the `zic(8)` man page, a zone line has this form, along with
Expand Down Expand Up @@ -930,52 +1096,6 @@ impl LineParser {
Self::default()
}

fn parse_rule<'a>(&self, input: &'a str) -> Result<Rule<'a>, Error> {
if let Some(caps) = self.rule_line.captures(input) {
let name = caps.name("name").unwrap().as_str();

let from_year = caps.name("from").unwrap().as_str().parse()?;

// The end year can be ‘only’ to indicate that this rule only
// takes place on that year.
let to_year = match caps.name("to").unwrap().as_str() {
"only" => None,
to => Some(to.parse()?),
};

// According to the spec, the only value inside the ‘type’ column
// should be “-”, so throw an error if it isn’t. (It only exists
// for compatibility with old versions that used to contain year
// types.) Sometimes “‐”, a Unicode hyphen, is used as well.
let t = caps.name("type").unwrap().as_str();
if t != "-" && t != "\u{2010}" {
return Err(Error::TypeColumnContainedNonHyphen(t.to_string()));
}

let month = caps.name("in").unwrap().as_str().parse()?;
let day = DaySpec::from_str(caps.name("on").unwrap().as_str())?;
let time = TimeSpecAndType::from_str(caps.name("at").unwrap().as_str())?;
let time_to_add = TimeSpec::from_str(caps.name("save").unwrap().as_str())?;
let letters = match caps.name("letters").unwrap().as_str() {
"-" => None,
l => Some(l),
};

Ok(Rule {
name,
from_year,
to_year,
month,
day,
time,
time_to_add,
letters,
})
} else {
Err(Error::NotParsedAsRuleLine)
}
}

fn saving_from_str<'a>(&self, input: &'a str) -> Result<Saving<'a>, Error> {
if input == "-" {
Ok(Saving::NoSaving)
Expand Down Expand Up @@ -1073,9 +1193,8 @@ impl LineParser {
Some(caps) => return self.zoneinfo_from_captures(caps).map(Line::Continuation),
}

match self.parse_rule(input) {
Err(Error::NotParsedAsRuleLine) => {}
result => return result.map(Line::Rule),
if input.starts_with("Rule") {
return Ok(Line::Rule(Rule::from_str(input)?));
}

match self.parse_link(input) {
Expand Down