From e31ae739d3d700216b20c598bd022d505945e151 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Fri, 12 Apr 2024 14:28:40 +0200 Subject: [PATCH 1/6] Deny dead code --- chrono-tz/src/directory.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/chrono-tz/src/directory.rs b/chrono-tz/src/directory.rs index 5741dfc..025afc4 100644 --- a/chrono-tz/src/directory.rs +++ b/chrono-tz/src/directory.rs @@ -1,5 +1,4 @@ #![allow(non_camel_case_types)] #![allow(non_snake_case)] #![allow(non_upper_case_globals)] -#![allow(dead_code)] include!(concat!(env!("OUT_DIR"), "/directory.rs")); From 130e88c34cf440aa9e200d1ddc24f800289dd21e Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Fri, 12 Apr 2024 18:28:49 +0200 Subject: [PATCH 2/6] Complete FIXME related to phf --- chrono-tz-build/src/lib.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/chrono-tz-build/src/lib.rs b/chrono-tz-build/src/lib.rs index 0557219..52cacac 100644 --- a/chrono-tz-build/src/lib.rs +++ b/chrono-tz-build/src/lib.rs @@ -112,8 +112,7 @@ fn write_timezone_file(timezone_file: &mut File, table: &Table) -> io::Result<() writeln!( timezone_file, "static TIMEZONES_UNCASED: ::phf::Map<&'static uncased::UncasedStr, Tz> = \n{};", - // FIXME(petrosagg): remove this once rust-phf/rust-phf#232 is released - map.build().to_string().replace("::std::mem::transmute", "::core::mem::transmute") + map.build() )?; } From 43a9611a5270cc72aedf3149b7d2334b383afb3f Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Fri, 12 Apr 2024 15:02:41 +0200 Subject: [PATCH 3/6] Test type size --- chrono-tz/src/lib.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/chrono-tz/src/lib.rs b/chrono-tz/src/lib.rs index 5393fe8..c919270 100644 --- a/chrono-tz/src/lib.rs +++ b/chrono-tz/src/lib.rs @@ -163,7 +163,8 @@ mod tests { use super::IANA_TZDB_VERSION; use super::US::Eastern; use super::UTC; - use chrono::{Duration, NaiveDate, TimeZone}; + use crate::TzOffset; + use chrono::{DateTime, Duration, NaiveDate, TimeZone}; #[test] fn london_to_berlin() { @@ -412,4 +413,10 @@ mod tests { assert_eq!(4, numbers.len()); assert!(IANA_TZDB_VERSION.ends_with(|c: char| c.is_ascii_lowercase())); } + + #[test] + fn test_type_size() { + assert_eq!(core::mem::size_of::(), 32); + assert_eq!(core::mem::size_of::>(), 48); + } } From e8d41a51465fc603a4e17c8e6a85af0bbdc04ecc Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Fri, 12 Apr 2024 19:34:44 +0200 Subject: [PATCH 4/6] Store abbreviations in a single string, compactly encode index and len --- chrono-tz-build/src/lib.rs | 32 +++++++++++++++++++++++++------- chrono-tz/src/lib.rs | 4 ++-- chrono-tz/src/timezone_impl.rs | 21 +++++++++++++++------ 3 files changed, 42 insertions(+), 15 deletions(-) diff --git a/chrono-tz-build/src/lib.rs b/chrono-tz-build/src/lib.rs index 52cacac..e514bb3 100644 --- a/chrono-tz-build/src/lib.rs +++ b/chrono-tz-build/src/lib.rs @@ -2,7 +2,7 @@ extern crate parse_zoneinfo; #[cfg(feature = "filter-by-regex")] extern crate regex; -use std::collections::BTreeSet; +use std::collections::{BTreeSet, HashSet}; use std::env; use std::fs::File; use std::io::{self, BufRead, BufReader, Write}; @@ -30,17 +30,17 @@ fn strip_comments(mut line: String) -> String { // Generate a list of the time zone periods beyond the first that apply // to this zone, as a string representation of a static slice. -fn format_rest(rest: Vec<(i64, FixedTimespan)>) -> String { +fn format_rest(rest: Vec<(i64, FixedTimespan)>, abbreviations: &str) -> String { let mut ret = "&[\n".to_string(); for (start, FixedTimespan { utc_offset, dst_offset, name }) in rest { ret.push_str(&format!( " ({start}, FixedTimespan {{ \ - utc_offset: {utc}, dst_offset: {dst}, name: \"{name}\" \ + utc_offset: {utc}, dst_offset: {dst}, abbreviation: {index_len} \ }}),\n", start = start, utc = utc_offset, dst = dst_offset, - name = name, + index_len = (abbreviations.find(&name).unwrap() << 3) | name.len(), )); } ret.push_str(" ]"); @@ -68,12 +68,29 @@ fn convert_bad_chars(name: &str) -> String { // TimeZone for any contained struct that implements `Timespans`. fn write_timezone_file(timezone_file: &mut File, table: &Table) -> io::Result<()> { let zones = table.zonesets.keys().chain(table.links.keys()).collect::>(); + + // Collect all unique abbreviations into a HashSet, sort, and concatenate into a string. + let mut abbreviations = HashSet::new(); + for zone in &zones { + let timespans = table.timespans(zone).unwrap(); + for (_, timespan) in timespans.rest.into_iter().chain(Some((0, timespans.first))) { + abbreviations.insert(timespan.name.clone()); + } + } + let mut abbreviations: Vec<_> = abbreviations.iter().collect(); + abbreviations.sort(); + let mut abbreviations_str = String::new(); + for abbr in abbreviations.drain(..) { + abbreviations_str.push_str(abbr) + } + writeln!(timezone_file, "use core::fmt::{{self, Debug, Display, Formatter}};",)?; writeln!(timezone_file, "use core::str::FromStr;\n",)?; writeln!( timezone_file, "use crate::timezone_impl::{{TimeSpans, FixedTimespanSet, FixedTimespan}};\n", )?; + writeln!(timezone_file, "pub(crate) const ABBREVIATIONS: &str = \"{}\";\n", abbreviations_str)?; writeln!( timezone_file, "/// TimeZones built at compile time from the tz database @@ -208,16 +225,17 @@ impl FromStr for Tz {{ first: FixedTimespan {{ utc_offset: {utc}, dst_offset: {dst}, - name: \"{name}\", + abbreviation: {index_len}, }}, rest: REST }} }},\n", zone = zone_name, - rest = format_rest(timespans.rest), + rest = format_rest(timespans.rest, &abbreviations_str), utc = timespans.first.utc_offset, dst = timespans.first.dst_offset, - name = timespans.first.name, + index_len = (abbreviations_str.find(×pans.first.name).unwrap() << 3) + | timespans.first.name.len(), )?; } write!( diff --git a/chrono-tz/src/lib.rs b/chrono-tz/src/lib.rs index c919270..ad95af9 100644 --- a/chrono-tz/src/lib.rs +++ b/chrono-tz/src/lib.rs @@ -416,7 +416,7 @@ mod tests { #[test] fn test_type_size() { - assert_eq!(core::mem::size_of::(), 32); - assert_eq!(core::mem::size_of::>(), 48); + assert_eq!(core::mem::size_of::(), 16); + assert_eq!(core::mem::size_of::>(), 28); } } diff --git a/chrono-tz/src/timezone_impl.rs b/chrono-tz/src/timezone_impl.rs index 4c1e3f4..1d3b730 100644 --- a/chrono-tz/src/timezone_impl.rs +++ b/chrono-tz/src/timezone_impl.rs @@ -6,7 +6,7 @@ use chrono::{ }; use crate::binary_search::binary_search; -use crate::timezones::Tz; +use crate::timezones::{Tz, ABBREVIATIONS}; /// Returns [`Tz::UTC`]. impl Default for Tz { @@ -25,8 +25,17 @@ pub struct FixedTimespan { pub utc_offset: i32, /// The additional offset from UTC for this timespan; typically for daylight saving time pub dst_offset: i32, - /// The name of this timezone, for example the difference between `EDT`/`EST` - pub name: &'static str, + /// The abbreviation of this offset, for example the difference between `EDT`/`EST`. + /// Stored as a slice of the `ABBREVIATIONS` static as `index << 3 | len`. + pub(crate) abbreviation: i16, +} + +impl FixedTimespan { + fn abbreviation(&self) -> &'static str { + let index = (self.abbreviation >> 3) as usize; + let len = (self.abbreviation & 0b111) as usize; + &ABBREVIATIONS[index..index + len] + } } impl Offset for FixedTimespan { @@ -37,13 +46,13 @@ impl Offset for FixedTimespan { impl Display for FixedTimespan { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{}", self.name) + write!(f, "{}", self.abbreviation()) } } impl Debug for FixedTimespan { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{}", self.name) + write!(f, "{}", self.abbreviation()) } } @@ -156,7 +165,7 @@ impl OffsetName for TzOffset { } fn abbreviation(&self) -> &str { - self.offset.name + self.offset.abbreviation() } } From 74a458862fd212d56e7b0ae46c25ba6a0b2fcae9 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Fri, 12 Apr 2024 19:39:21 +0200 Subject: [PATCH 5/6] Merge offsets into one `i32` --- chrono-tz-build/src/lib.rs | 17 +++++------------ chrono-tz/src/lib.rs | 4 ++-- chrono-tz/src/timezone_impl.rs | 35 ++++++++++++++++++---------------- 3 files changed, 26 insertions(+), 30 deletions(-) diff --git a/chrono-tz-build/src/lib.rs b/chrono-tz-build/src/lib.rs index e514bb3..a1272c6 100644 --- a/chrono-tz-build/src/lib.rs +++ b/chrono-tz-build/src/lib.rs @@ -34,12 +34,9 @@ fn format_rest(rest: Vec<(i64, FixedTimespan)>, abbreviations: &str) -> String { let mut ret = "&[\n".to_string(); for (start, FixedTimespan { utc_offset, dst_offset, name }) in rest { ret.push_str(&format!( - " ({start}, FixedTimespan {{ \ - utc_offset: {utc}, dst_offset: {dst}, abbreviation: {index_len} \ - }}),\n", + " ({start}, FixedTimespan {{ offset: {offset}, abbreviation: {index_len} }}),\n", start = start, - utc = utc_offset, - dst = dst_offset, + offset = utc_offset << 14 | (dst_offset & ((1 << 14) - 1)), index_len = (abbreviations.find(&name).unwrap() << 3) | name.len(), )); } @@ -222,18 +219,14 @@ impl FromStr for Tz {{ " Tz::{zone} => {{ const REST: &[(i64, FixedTimespan)] = {rest}; FixedTimespanSet {{ - first: FixedTimespan {{ - utc_offset: {utc}, - dst_offset: {dst}, - abbreviation: {index_len}, - }}, + first: FixedTimespan {{ offset: {offset}, abbreviation: {index_len} }}, rest: REST }} }},\n", zone = zone_name, rest = format_rest(timespans.rest, &abbreviations_str), - utc = timespans.first.utc_offset, - dst = timespans.first.dst_offset, + offset = + timespans.first.utc_offset << 14 | (timespans.first.dst_offset & ((1 << 14) - 1)), index_len = (abbreviations_str.find(×pans.first.name).unwrap() << 3) | timespans.first.name.len(), )?; diff --git a/chrono-tz/src/lib.rs b/chrono-tz/src/lib.rs index ad95af9..9131aef 100644 --- a/chrono-tz/src/lib.rs +++ b/chrono-tz/src/lib.rs @@ -416,7 +416,7 @@ mod tests { #[test] fn test_type_size() { - assert_eq!(core::mem::size_of::(), 16); - assert_eq!(core::mem::size_of::>(), 28); + assert_eq!(core::mem::size_of::(), 12); + assert_eq!(core::mem::size_of::>(), 24); } } diff --git a/chrono-tz/src/timezone_impl.rs b/chrono-tz/src/timezone_impl.rs index 1d3b730..8589311 100644 --- a/chrono-tz/src/timezone_impl.rs +++ b/chrono-tz/src/timezone_impl.rs @@ -20,11 +20,10 @@ impl Default for Tz { /// For example, [`::US::Eastern`] is composed of at least two /// `FixedTimespan`s: `EST` and `EDT`, that are variously in effect. #[derive(Copy, Clone, PartialEq, Eq)] -pub struct FixedTimespan { - /// The base offset from UTC; this usually doesn't change unless the government changes something - pub utc_offset: i32, - /// The additional offset from UTC for this timespan; typically for daylight saving time - pub dst_offset: i32, +pub(crate) struct FixedTimespan { + /// We encode two values in the offset: the base offset from utc and the extra offset due to + /// DST. It is encoded as `(utc_offset << 14) | (dst_offset & ((1 << 14) - 1))`. + pub(crate) offset: i32, /// The abbreviation of this offset, for example the difference between `EDT`/`EST`. /// Stored as a slice of the `ABBREVIATIONS` static as `index << 3 | len`. pub(crate) abbreviation: i16, @@ -36,11 +35,19 @@ impl FixedTimespan { let len = (self.abbreviation & 0b111) as usize; &ABBREVIATIONS[index..index + len] } + + fn base_offset(&self) -> i32 { + self.offset >> 14 + } + + fn saving(&self) -> i32 { + (self.offset << 18) >> 18 + } } impl Offset for FixedTimespan { fn fix(&self) -> FixedOffset { - FixedOffset::east_opt(self.utc_offset + self.dst_offset).unwrap() + FixedOffset::east_opt(self.base_offset() + self.saving()).unwrap() } } @@ -151,11 +158,11 @@ impl TzOffset { impl OffsetComponents for TzOffset { fn base_utc_offset(&self) -> Duration { - Duration::seconds(self.offset.utc_offset as i64) + Duration::seconds(self.offset.base_offset() as i64) } fn dst_offset(&self) -> Duration { - Duration::seconds(self.offset.dst_offset as i64) + Duration::seconds(self.offset.saving() as i64) } } @@ -252,21 +259,17 @@ impl FixedTimespanSet { None } else { let span = self.rest[index - 1]; - Some(span.0 + span.1.utc_offset as i64 + span.1.dst_offset as i64) + Some(span.0 + (span.1.base_offset() + span.1.saving()) as i64) }, end: if index == self.rest.len() { None } else if index == 0 { - Some( - self.rest[index].0 - + self.first.utc_offset as i64 - + self.first.dst_offset as i64, - ) + Some(self.rest[index].0 + (self.first.base_offset() + self.first.saving()) as i64) } else { Some( self.rest[index].0 - + self.rest[index - 1].1.utc_offset as i64 - + self.rest[index - 1].1.dst_offset as i64, + + self.rest[index - 1].1.base_offset() as i64 + + self.rest[index - 1].1.saving() as i64, ) }, } From 7c47b180d21ea785eb4d4f9cd3d6ded372099f53 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Fri, 12 Apr 2024 19:41:44 +0200 Subject: [PATCH 6/6] Don't wrap `FixedTimespan` in `TzOffset` --- chrono-tz/src/lib.rs | 4 +-- chrono-tz/src/timezone_impl.rs | 47 +++++++++++----------------------- 2 files changed, 17 insertions(+), 34 deletions(-) diff --git a/chrono-tz/src/lib.rs b/chrono-tz/src/lib.rs index 9131aef..593a773 100644 --- a/chrono-tz/src/lib.rs +++ b/chrono-tz/src/lib.rs @@ -416,7 +416,7 @@ mod tests { #[test] fn test_type_size() { - assert_eq!(core::mem::size_of::(), 12); - assert_eq!(core::mem::size_of::>(), 24); + assert_eq!(core::mem::size_of::(), 8); + assert_eq!(core::mem::size_of::>(), 20); } } diff --git a/chrono-tz/src/timezone_impl.rs b/chrono-tz/src/timezone_impl.rs index 8589311..123727c 100644 --- a/chrono-tz/src/timezone_impl.rs +++ b/chrono-tz/src/timezone_impl.rs @@ -30,12 +30,6 @@ pub(crate) struct FixedTimespan { } impl FixedTimespan { - fn abbreviation(&self) -> &'static str { - let index = (self.abbreviation >> 3) as usize; - let len = (self.abbreviation & 0b111) as usize; - &ABBREVIATIONS[index..index + len] - } - fn base_offset(&self) -> i32 { self.offset >> 14 } @@ -45,28 +39,15 @@ impl FixedTimespan { } } -impl Offset for FixedTimespan { - fn fix(&self) -> FixedOffset { - FixedOffset::east_opt(self.base_offset() + self.saving()).unwrap() - } -} - -impl Display for FixedTimespan { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{}", self.abbreviation()) - } -} - -impl Debug for FixedTimespan { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{}", self.abbreviation()) - } -} - #[derive(Copy, Clone, PartialEq, Eq)] pub struct TzOffset { tz: Tz, - offset: FixedTimespan, + /// We encode two values in the offset: the base offset from utc and the extra offset due to + /// DST. It is encoded as `(utc_offset << 14) | (saving & ((1 << 14) - 1))`. + offset: i32, + /// The abbreviation of this offset, for example the difference between `EDT`/`EST`. + /// Stored as a slice of the `ABBREVIATIONS` static as `index << 3 | len`. + abbreviation: i16, } /// Detailed timezone offset components that expose any special conditions currently in effect. @@ -142,7 +123,7 @@ pub trait OffsetName { impl TzOffset { fn new(tz: Tz, offset: FixedTimespan) -> Self { - TzOffset { tz, offset } + TzOffset { tz, offset: offset.offset, abbreviation: offset.abbreviation } } fn map_localresult(tz: Tz, result: LocalResult) -> LocalResult { @@ -158,11 +139,11 @@ impl TzOffset { impl OffsetComponents for TzOffset { fn base_utc_offset(&self) -> Duration { - Duration::seconds(self.offset.base_offset() as i64) + Duration::seconds((self.offset >> 14) as i64) } fn dst_offset(&self) -> Duration { - Duration::seconds(self.offset.saving() as i64) + Duration::seconds(((self.offset << 18) >> 18) as i64) } } @@ -172,25 +153,27 @@ impl OffsetName for TzOffset { } fn abbreviation(&self) -> &str { - self.offset.abbreviation() + let index = (self.abbreviation >> 3) as usize; + let len = (self.abbreviation & 0b111) as usize; + &ABBREVIATIONS[index..index + len] } } impl Offset for TzOffset { fn fix(&self) -> FixedOffset { - self.offset.fix() + FixedOffset::east_opt((self.offset >> 14) + ((self.offset << 18) >> 18)).unwrap() } } impl Display for TzOffset { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - Display::fmt(&self.offset, f) + f.write_str(self.abbreviation()) } } impl Debug for TzOffset { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - Debug::fmt(&self.offset, f) + f.write_str(self.abbreviation()) } }