Skip to content

Commit

Permalink
parse-zoneinfo: replace rule parser with simple state machine
Browse files Browse the repository at this point in the history
  • Loading branch information
djc committed Apr 15, 2024
1 parent 9e62ba1 commit 8c821bb
Showing 1 changed file with 177 additions and 59 deletions.
236 changes: 177 additions & 59 deletions parse-zoneinfo/src/line.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ use std::ascii::AsciiExt;
use regex::{Captures, Regex};

pub struct LineParser {
rule_line: Regex,
day_field: Regex,
hm_field: Regex,
hms_field: Regex,
Expand Down Expand Up @@ -137,23 +136,6 @@ impl std::error::Error for Error {}
impl Default for LineParser {
fn default() -> Self {
LineParser {
rule_line: Regex::new(
r##"(?x) ^
Rule \s+
( ?P<name> \S+) \s+
( ?P<from> \S+) \s+
( ?P<to> \S+) \s+
( ?P<type> \S+) \s+
( ?P<in> \S+) \s+
( ?P<on> \S+) \s+
( ?P<at> \S+) \s+
( ?P<save> \S+) \s+
( ?P<letters> \S+) \s*
(\#.*)?
$ "##,
)
.unwrap(),

day_field: Regex::new(
r##"(?x) ^
( ?P<weekday> \w+ )
Expand Down Expand Up @@ -952,49 +934,133 @@ impl LineParser {
}

fn parse_rule<'a>(&self, input: &'a str) -> Result<Rule<'a>, Error> {
if let Some(caps) = self.rule_line.captures(input) {
let name = caps.name("name").unwrap().as_str();

let from_year = caps.name("from").unwrap().as_str().parse()?;

// The end year can be ‘only’ to indicate that this rule only
// takes place on that year.
let to_year = match caps.name("to").unwrap().as_str() {
"only" => None,
to => Some(to.parse()?),
};

// According to the spec, the only value inside the ‘type’ column
// should be “-”, so throw an error if it isn’t. (It only exists
// for compatibility with old versions that used to contain year
// types.) Sometimes “‐”, a Unicode hyphen, is used as well.
let t = caps.name("type").unwrap().as_str();
if t != "-" && t != "\u{2010}" {
return Err(Error::TypeColumnContainedNonHyphen(t.to_string()));
}

let month = caps.name("in").unwrap().as_str().parse()?;
let day = self.parse_dayspec(caps.name("on").unwrap().as_str())?;
let time = self.parse_timespec_and_type(caps.name("at").unwrap().as_str())?;
let time_to_add = self.parse_timespec(caps.name("save").unwrap().as_str())?;
let letters = match caps.name("letters").unwrap().as_str() {
"-" => None,
l => Some(l),
let mut state = RuleState::Start;
for part in input.split_ascii_whitespace() {
state = match (state, part) {
(RuleState::Start, "Rule") => RuleState::Name,
(RuleState::Name, name) => RuleState::FromYear { name },
(RuleState::FromYear { name }, year) => RuleState::ToYear {
name,
from_year: Year::from_str(year)?,
},
(RuleState::ToYear { name, from_year }, year) => RuleState::Type {
name,
from_year,
to_year: match year {
"only" => None,
_ => Some(Year::from_str(year)?),
},
},
(
RuleState::Type {
name,
from_year,
to_year,
},
"-" | "\u{2010}",
) => RuleState::Month {
name,
from_year,
to_year,
},
(RuleState::Type { .. }, _) => {
return Err(Error::TypeColumnContainedNonHyphen(part.to_string()))
}
(
RuleState::Month {
name,
from_year,
to_year,
},
month,
) => RuleState::Day {
name,
from_year,
to_year,
month: Month::from_str(month)?,
},
(
RuleState::Day {
name,
from_year,
to_year,
month,
},
day,
) => RuleState::Time {
name,
from_year,
to_year,
month,
day: self.parse_dayspec(day)?,
},
(
RuleState::Time {
name,
from_year,
to_year,
month,
day,
},
time,
) => RuleState::TimeToAdd {
name,
from_year,
to_year,
month,
day,
time: self.parse_timespec_and_type(time)?,
},
(
RuleState::TimeToAdd {
name,
from_year,
to_year,
month,
day,
time,
},
time_to_add,
) => RuleState::Letters {
name,
from_year,
to_year,
month,
day,
time,
time_to_add: self.parse_timespec(time_to_add)?,
},
(
RuleState::Letters {
name,
from_year,
to_year,
month,
day,
time,
time_to_add,
},
letters,
) => {
return Ok(Rule {
name,
from_year,
to_year,
month,
day,
time,
time_to_add,
letters: match letters {
"-" => None,
_ => Some(letters),
},
})
}
_ => return Err(Error::NotParsedAsRuleLine),
};

Ok(Rule {
name,
from_year,
to_year,
month,
day,
time,
time_to_add,
letters,
})
} else {
Err(Error::NotParsedAsRuleLine)
}

Err(Error::NotParsedAsRuleLine)
}

fn saving_from_str<'a>(&self, input: &'a str) -> Result<Saving<'a>, Error> {
Expand Down Expand Up @@ -1109,6 +1175,58 @@ impl LineParser {
}
}

enum RuleState<'a> {
Start,
Name,
FromYear {
name: &'a str,
},
ToYear {
name: &'a str,
from_year: Year,
},
Type {
name: &'a str,
from_year: Year,
to_year: Option<Year>,
},
Month {
name: &'a str,
from_year: Year,
to_year: Option<Year>,
},
Day {
name: &'a str,
from_year: Year,
to_year: Option<Year>,
month: Month,
},
Time {
name: &'a str,
from_year: Year,
to_year: Option<Year>,
month: Month,
day: DaySpec,
},
TimeToAdd {
name: &'a str,
from_year: Year,
to_year: Option<Year>,
month: Month,
day: DaySpec,
time: TimeSpecAndType,
},
Letters {
name: &'a str,
from_year: Year,
to_year: Option<Year>,
month: Month,
day: DaySpec,
time: TimeSpecAndType,
time_to_add: TimeSpec,
},
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down

0 comments on commit 8c821bb

Please sign in to comment.