Skip to content

Commit 6b05270

Browse files
committed
Add support for tokenized floats.
1 parent 0c714bb commit 6b05270

File tree

8 files changed

+669
-59
lines changed

8 files changed

+669
-59
lines changed

extras/simple-bench/Cargo.toml

+4
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,7 @@ anyhow = "1.0"
1414
lexical = "5.2"
1515
lexical-core = "0.7"
1616
fastrand = "1.4"
17+
18+
[features]
19+
default = []
20+
use_tokenized = []

extras/simple-bench/src/main.rs

+93-1
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ fn run_bench<T: FastFloat, F: Fn(&str) -> T>(
108108
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
109109
enum Method {
110110
FastFloat,
111+
#[cfg(feature = "use_tokenized")]
112+
FastFloatTokenized,
111113
Lexical,
112114
FromStr,
113115
}
@@ -120,10 +122,87 @@ fn type_str(float32: bool) -> &'static str {
120122
}
121123
}
122124

125+
#[inline]
126+
#[cfg(feature = "use_tokenized")]
127+
fn parse_sign<'a>(s: &'a str) -> (bool, &'a str) {
128+
match s.as_bytes().get(0) {
129+
Some(&b'+') => (false, &s[1..]),
130+
Some(&b'-') => (true, &s[1..]),
131+
_ => (false, s),
132+
}
133+
}
134+
135+
#[inline]
136+
#[cfg(feature = "use_tokenized")]
137+
fn decimal_index(s: &str) -> Option<usize> {
138+
s.as_bytes().iter().position(|&c| c == b'.')
139+
}
140+
141+
#[inline]
142+
#[cfg(feature = "use_tokenized")]
143+
fn exponent_index(s: &str) -> Option<usize> {
144+
s.as_bytes().iter().position(|&c| c == b'e' || c == b'E')
145+
}
146+
147+
#[inline]
148+
#[cfg(feature = "use_tokenized")]
149+
fn split_index<'a>(s: &'a str, index: usize) -> (&'a str, &'a str) {
150+
let (lead, trail) = s.as_bytes().split_at(index);
151+
let trail = &trail[1..];
152+
use std::str;
153+
unsafe {
154+
(str::from_utf8_unchecked(lead), str::from_utf8_unchecked(trail))
155+
}
156+
}
157+
158+
#[inline]
159+
#[cfg(feature = "use_tokenized")]
160+
fn split_end<'a>(s: &'a str) -> (&'a str, &'a str) {
161+
let (lead, trail) = s.as_bytes().split_at(s.len());
162+
use std::str;
163+
unsafe {
164+
(str::from_utf8_unchecked(lead), str::from_utf8_unchecked(trail))
165+
}
166+
}
167+
168+
#[inline]
169+
#[cfg(feature = "use_tokenized")]
170+
fn parse_exponent(s: &str) -> i64 {
171+
s.parse::<i64>().unwrap()
172+
}
173+
174+
#[inline]
175+
#[cfg(feature = "use_tokenized")]
176+
fn tokenize<'a>(s: &'a str) -> (&'a str, &'a str, i64, bool) {
177+
let (negative, s) = parse_sign(s);
178+
if let Some(index) = decimal_index(s) {
179+
let (i, rest) = split_index(s, index);
180+
if let Some(index) = exponent_index(s) {
181+
let (f, exp) = split_index(rest, index);
182+
let exp = parse_exponent(exp);
183+
(i, f, exp, negative)
184+
} else {
185+
(i, rest, 0, negative)
186+
}
187+
} else {
188+
if let Some(index) = exponent_index(s) {
189+
let (i, exp) = split_index(s, index);
190+
let (i, f) = split_end(i);
191+
let exp = parse_exponent(exp);
192+
(i, f, exp, negative)
193+
} else {
194+
let (i, f) = split_end(s);
195+
(i, f, 0, negative)
196+
}
197+
}
198+
}
199+
123200
impl Method {
124201
pub fn name(&self) -> &'static str {
125202
match self {
126203
Self::FastFloat => "fast-float",
204+
#[cfg(feature = "use_tokenized")]
205+
Self::FastFloatTokenized => "fast-float-tokenized",
127206
Self::Lexical => "lexical",
128207
Self::FromStr => "from_str",
129208
}
@@ -140,6 +219,11 @@ impl Method {
140219
Self::FastFloat => run_bench(data, repeat, |s: &str| {
141220
fast_float::parse_partial::<T, _>(s).unwrap_or_default().0
142221
}),
222+
#[cfg(feature = "use_tokenized")]
223+
Self::FastFloatTokenized => run_bench(data, repeat, |s: &str| {
224+
let (i, f, e, n) = tokenize(s);
225+
fast_float::parse_from_parts::<T, _>(i, f, e, n)
226+
}),
143227
Self::Lexical => run_bench(data, repeat, |s: &str| {
144228
lexical_core::parse_partial::<T>(s.as_bytes())
145229
.unwrap_or_default()
@@ -165,7 +249,15 @@ impl Method {
165249
}
166250

167251
pub fn all() -> &'static [Self] {
168-
&[Method::FastFloat, Method::Lexical, Method::FromStr]
252+
#[cfg(feature = "use_tokenized")]
253+
{
254+
&[Method::FastFloat, Method::FastFloatTokenized, Method::Lexical, Method::FromStr]
255+
}
256+
257+
#[cfg(not(feature = "use_tokenized"))]
258+
{
259+
&[Method::FastFloat, Method::Lexical, Method::FromStr]
260+
}
169261
}
170262
}
171263

src/decimal.rs

+70-31
Original file line numberDiff line numberDiff line change
@@ -187,41 +187,37 @@ impl Decimal {
187187
}
188188

189189
#[inline]
190-
pub fn parse_decimal(mut s: &[u8]) -> Decimal {
191-
// can't fail since it follows a call to parse_number
192-
let mut d = Decimal::default();
193-
let start = s;
194-
let c = s.get_first();
195-
d.negative = c == b'-';
196-
if c == b'-' || c == b'+' {
197-
s = s.advance(1);
190+
fn parse_fractional<'a>(mut s: &'a [u8], d: &mut Decimal) -> &'a [u8] {
191+
let first = s;
192+
if d.num_digits == 0 {
193+
s = s.skip_chars(b'0');
198194
}
199-
s = s.skip_chars(b'0');
200-
parse_digits(&mut s, |digit| d.try_add_digit(digit));
201-
if s.check_first(b'.') {
202-
s = s.advance(1);
203-
let first = s;
204-
if d.num_digits == 0 {
205-
s = s.skip_chars(b'0');
206-
}
207-
if cfg!(target_endian = "little") {
208-
while s.len() >= 8 && d.num_digits + 8 < Decimal::MAX_DIGITS {
209-
let v = s.read_u64();
210-
if !is_8digits_le(v) {
211-
break;
212-
}
213-
d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030);
214-
d.num_digits += 8;
215-
s = s.advance(8);
195+
if cfg!(target_endian = "little") {
196+
while s.len() >= 8 && d.num_digits + 8 < Decimal::MAX_DIGITS {
197+
let v = s.read_u64();
198+
if !is_8digits_le(v) {
199+
break;
216200
}
201+
d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030);
202+
d.num_digits += 8;
203+
s = s.advance(8);
217204
}
218-
parse_digits(&mut s, |digit| d.try_add_digit(digit));
219-
d.decimal_point = s.len() as i32 - first.len() as i32;
220205
}
206+
parse_digits(&mut s, |digit| d.try_add_digit(digit));
207+
d.decimal_point = s.len() as i32 - first.len() as i32;
208+
209+
s
210+
}
211+
212+
#[inline]
213+
fn trim_zeros<'a, Iter>(iter: Iter, d: &mut Decimal)
214+
where
215+
Iter: Iterator<Item=&'a u8>
216+
{
221217
if d.num_digits != 0 {
222218
// Ignore the trailing zeros if there are any
223219
let mut n_trailing_zeros = 0;
224-
for &c in start[..(start.len() - s.len())].iter().rev() {
220+
for &c in iter {
225221
if c == b'0' {
226222
n_trailing_zeros += 1;
227223
} else if c != b'.' {
@@ -236,6 +232,51 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal {
236232
d.num_digits = Decimal::MAX_DIGITS;
237233
}
238234
}
235+
}
236+
237+
#[inline]
238+
fn add_zero_digits(d: &mut Decimal) {
239+
for i in d.num_digits..Decimal::MAX_DIGITS_WITHOUT_OVERFLOW {
240+
d.digits[i] = 0;
241+
}
242+
}
243+
244+
#[inline]
245+
pub fn parse_decimal_from_parts(mut i: &[u8], f: &[u8], e: i64, negative: bool) -> Decimal {
246+
// can't fail since it follows a call to parse_number
247+
let mut d = Decimal::default();
248+
249+
// Integral
250+
let i_start = i;
251+
d.negative = negative;
252+
i = i.skip_chars(b'0');
253+
parse_digits(&mut i, |digit| d.try_add_digit(digit));
254+
255+
parse_fractional(f, &mut d);
256+
trim_zeros(i_start.iter().chain(f.iter()).rev(), &mut d);
257+
d.decimal_point += e as i32;
258+
add_zero_digits(&mut d);
259+
260+
d
261+
}
262+
263+
#[inline]
264+
pub fn parse_decimal(mut s: &[u8]) -> Decimal {
265+
// can't fail since it follows a call to parse_number
266+
let mut d = Decimal::default();
267+
let start = s;
268+
let c = s.get_first();
269+
d.negative = c == b'-';
270+
if c == b'-' || c == b'+' {
271+
s = s.advance(1);
272+
}
273+
s = s.skip_chars(b'0');
274+
parse_digits(&mut s, |digit| d.try_add_digit(digit));
275+
if s.check_first(b'.') {
276+
s = s.advance(1);
277+
s = parse_fractional(s, &mut d);
278+
}
279+
trim_zeros(start[..(start.len() - s.len())].iter().rev(), &mut d);
239280
if s.check_first2(b'e', b'E') {
240281
s = s.advance(1);
241282
let mut neg_exp = false;
@@ -253,9 +294,7 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal {
253294
});
254295
d.decimal_point += if neg_exp { -exp_num } else { exp_num };
255296
}
256-
for i in d.num_digits..Decimal::MAX_DIGITS_WITHOUT_OVERFLOW {
257-
d.digits[i] = 0;
258-
}
297+
add_zero_digits(&mut d);
259298
d
260299
}
261300

src/lib.rs

+30
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,21 @@ pub trait FastFloat: float::Float {
105105
fn parse_float_partial<S: AsRef<[u8]>>(s: S) -> Result<(Self, usize)> {
106106
parse::parse_float(s.as_ref()).ok_or(Error)
107107
}
108+
109+
/// Parse a pre-tokenized decimal number from string into float.
110+
///
111+
/// This assumes the float has already been tokenized into valid
112+
/// integral and fractional components, and has parsed an optional
113+
/// exponent notation.
114+
///
115+
/// It is up to you to validate and tokenize the input: although
116+
/// this will not error, this might truncate the significant
117+
/// digits as soon as an invalid digit is found. This does not
118+
/// handle special values, such as NaN, INF, or Infinity.
119+
#[inline]
120+
fn parse_from_parts<S: AsRef<[u8]>>(integral: S, fractional: S, exponent: i64, negative: bool) -> Self {
121+
parse::parse_from_parts(integral.as_ref(), fractional.as_ref(), exponent, negative)
122+
}
108123
}
109124

110125
impl FastFloat for f32 {}
@@ -134,3 +149,18 @@ pub fn parse<T: FastFloat, S: AsRef<[u8]>>(s: S) -> Result<T> {
134149
pub fn parse_partial<T: FastFloat, S: AsRef<[u8]>>(s: S) -> Result<(T, usize)> {
135150
T::parse_float_partial(s)
136151
}
152+
153+
/// Parse a pre-tokenized decimal number from string into float.
154+
///
155+
/// This assumes the float has already been tokenized into valid
156+
/// integral and fractional components, and has parsed an optional
157+
/// exponent notation.
158+
///
159+
/// It is up to you to validate and tokenize the input: although
160+
/// this will not error, this might truncate the significant
161+
/// digits as soon as an invalid digit is found. This does not
162+
/// handle special values, such as NaN, INF, or Infinity.
163+
#[inline]
164+
pub fn parse_from_parts<T: FastFloat, S: AsRef<[u8]>>(integral: S, fractional: S, exponent: i64, negative: bool) -> T {
165+
T::parse_from_parts(integral.as_ref(), fractional.as_ref(), exponent, negative)
166+
}

0 commit comments

Comments
 (0)