Skip to content

Commit 58c5e81

Browse files
committed
Add support for tokenized floats.
1 parent 0c714bb commit 58c5e81

File tree

7 files changed

+647
-59
lines changed

7 files changed

+647
-59
lines changed

extras/simple-bench/src/main.rs

+75-1
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ fn run_bench<T: FastFloat, F: Fn(&str) -> T>(
108108
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
109109
enum Method {
110110
FastFloat,
111+
FastFloatTokenized,
111112
Lexical,
112113
FromStr,
113114
}
@@ -120,10 +121,79 @@ fn type_str(float32: bool) -> &'static str {
120121
}
121122
}
122123

124+
#[inline]
125+
fn parse_sign<'a>(s: &'a str) -> (bool, &'a str) {
126+
match s.as_bytes().get(0) {
127+
Some(&b'+') => (false, &s[1..]),
128+
Some(&b'-') => (true, &s[1..]),
129+
_ => (false, s),
130+
}
131+
}
132+
133+
#[inline]
134+
fn decimal_index(s: &str) -> Option<usize> {
135+
s.as_bytes().iter().position(|&c| c == b'.')
136+
}
137+
138+
#[inline]
139+
fn exponent_index(s: &str) -> Option<usize> {
140+
s.as_bytes().iter().position(|&c| c == b'e' || c == b'E')
141+
}
142+
143+
#[inline]
144+
fn split_index<'a>(s: &'a str, index: usize) -> (&'a str, &'a str) {
145+
let (lead, trail) = s.as_bytes().split_at(index);
146+
let trail = &trail[1..];
147+
use std::str;
148+
unsafe {
149+
(str::from_utf8_unchecked(lead), str::from_utf8_unchecked(trail))
150+
}
151+
}
152+
153+
#[inline]
154+
fn split_end<'a>(s: &'a str) -> (&'a str, &'a str) {
155+
let (lead, trail) = s.as_bytes().split_at(s.len());
156+
use std::str;
157+
unsafe {
158+
(str::from_utf8_unchecked(lead), str::from_utf8_unchecked(trail))
159+
}
160+
}
161+
162+
#[inline]
163+
fn parse_exponent(s: &str) -> i64 {
164+
s.parse::<i64>().unwrap()
165+
}
166+
167+
#[inline]
168+
fn tokenize<'a>(s: &'a str) -> (&'a str, &'a str, i64, bool) {
169+
let (negative, s) = parse_sign(s);
170+
if let Some(index) = decimal_index(s) {
171+
let (i, rest) = split_index(s, index);
172+
if let Some(index) = exponent_index(s) {
173+
let (f, exp) = split_index(rest, index);
174+
let exp = parse_exponent(exp);
175+
(i, f, exp, negative)
176+
} else {
177+
(i, rest, 0, negative)
178+
}
179+
} else {
180+
if let Some(index) = exponent_index(s) {
181+
let (i, exp) = split_index(s, index);
182+
let (i, f) = split_end(i);
183+
let exp = parse_exponent(exp);
184+
(i, f, exp, negative)
185+
} else {
186+
let (i, f) = split_end(s);
187+
(i, f, 0, negative)
188+
}
189+
}
190+
}
191+
123192
impl Method {
124193
pub fn name(&self) -> &'static str {
125194
match self {
126195
Self::FastFloat => "fast-float",
196+
Self::FastFloatTokenized => "fast-float-tokenized",
127197
Self::Lexical => "lexical",
128198
Self::FromStr => "from_str",
129199
}
@@ -140,6 +210,10 @@ impl Method {
140210
Self::FastFloat => run_bench(data, repeat, |s: &str| {
141211
fast_float::parse_partial::<T, _>(s).unwrap_or_default().0
142212
}),
213+
Self::FastFloatTokenized => run_bench(data, repeat, |s: &str| {
214+
let (i, f, e, n) = tokenize(s);
215+
fast_float::parse_from_parts::<T, _>(i, f, e, n)
216+
}),
143217
Self::Lexical => run_bench(data, repeat, |s: &str| {
144218
lexical_core::parse_partial::<T>(s.as_bytes())
145219
.unwrap_or_default()
@@ -165,7 +239,7 @@ impl Method {
165239
}
166240

167241
pub fn all() -> &'static [Self] {
168-
&[Method::FastFloat, Method::Lexical, Method::FromStr]
242+
&[Method::FastFloat, Method::FastFloatTokenized, Method::Lexical, Method::FromStr]
169243
}
170244
}
171245

src/decimal.rs

+70-31
Original file line numberDiff line numberDiff line change
@@ -187,41 +187,37 @@ impl Decimal {
187187
}
188188

189189
#[inline]
190-
pub fn parse_decimal(mut s: &[u8]) -> Decimal {
191-
// can't fail since it follows a call to parse_number
192-
let mut d = Decimal::default();
193-
let start = s;
194-
let c = s.get_first();
195-
d.negative = c == b'-';
196-
if c == b'-' || c == b'+' {
197-
s = s.advance(1);
190+
fn parse_fractional<'a>(mut s: &'a [u8], d: &mut Decimal) -> &'a [u8] {
191+
let first = s;
192+
if d.num_digits == 0 {
193+
s = s.skip_chars(b'0');
198194
}
199-
s = s.skip_chars(b'0');
200-
parse_digits(&mut s, |digit| d.try_add_digit(digit));
201-
if s.check_first(b'.') {
202-
s = s.advance(1);
203-
let first = s;
204-
if d.num_digits == 0 {
205-
s = s.skip_chars(b'0');
206-
}
207-
if cfg!(target_endian = "little") {
208-
while s.len() >= 8 && d.num_digits + 8 < Decimal::MAX_DIGITS {
209-
let v = s.read_u64();
210-
if !is_8digits_le(v) {
211-
break;
212-
}
213-
d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030);
214-
d.num_digits += 8;
215-
s = s.advance(8);
195+
if cfg!(target_endian = "little") {
196+
while s.len() >= 8 && d.num_digits + 8 < Decimal::MAX_DIGITS {
197+
let v = s.read_u64();
198+
if !is_8digits_le(v) {
199+
break;
216200
}
201+
d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030);
202+
d.num_digits += 8;
203+
s = s.advance(8);
217204
}
218-
parse_digits(&mut s, |digit| d.try_add_digit(digit));
219-
d.decimal_point = s.len() as i32 - first.len() as i32;
220205
}
206+
parse_digits(&mut s, |digit| d.try_add_digit(digit));
207+
d.decimal_point = s.len() as i32 - first.len() as i32;
208+
209+
s
210+
}
211+
212+
#[inline]
213+
fn trim_zeros<'a, Iter>(iter: Iter, d: &mut Decimal)
214+
where
215+
Iter: Iterator<Item=&'a u8>
216+
{
221217
if d.num_digits != 0 {
222218
// Ignore the trailing zeros if there are any
223219
let mut n_trailing_zeros = 0;
224-
for &c in start[..(start.len() - s.len())].iter().rev() {
220+
for &c in iter {
225221
if c == b'0' {
226222
n_trailing_zeros += 1;
227223
} else if c != b'.' {
@@ -236,6 +232,51 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal {
236232
d.num_digits = Decimal::MAX_DIGITS;
237233
}
238234
}
235+
}
236+
237+
#[inline]
238+
fn add_zero_digits(d: &mut Decimal) {
239+
for i in d.num_digits..Decimal::MAX_DIGITS_WITHOUT_OVERFLOW {
240+
d.digits[i] = 0;
241+
}
242+
}
243+
244+
#[inline]
245+
pub fn parse_decimal_from_parts(mut i: &[u8], f: &[u8], e: i64, negative: bool) -> Decimal {
246+
// can't fail since it follows a call to parse_number
247+
let mut d = Decimal::default();
248+
249+
// Integral
250+
let i_start = i;
251+
d.negative = negative;
252+
i = i.skip_chars(b'0');
253+
parse_digits(&mut i, |digit| d.try_add_digit(digit));
254+
255+
parse_fractional(f, &mut d);
256+
trim_zeros(i_start.iter().chain(f.iter()).rev(), &mut d);
257+
d.decimal_point += e as i32;
258+
add_zero_digits(&mut d);
259+
260+
d
261+
}
262+
263+
#[inline]
264+
pub fn parse_decimal(mut s: &[u8]) -> Decimal {
265+
// can't fail since it follows a call to parse_number
266+
let mut d = Decimal::default();
267+
let start = s;
268+
let c = s.get_first();
269+
d.negative = c == b'-';
270+
if c == b'-' || c == b'+' {
271+
s = s.advance(1);
272+
}
273+
s = s.skip_chars(b'0');
274+
parse_digits(&mut s, |digit| d.try_add_digit(digit));
275+
if s.check_first(b'.') {
276+
s = s.advance(1);
277+
s = parse_fractional(s, &mut d);
278+
}
279+
trim_zeros(start[..(start.len() - s.len())].iter().rev(), &mut d);
239280
if s.check_first2(b'e', b'E') {
240281
s = s.advance(1);
241282
let mut neg_exp = false;
@@ -253,9 +294,7 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal {
253294
});
254295
d.decimal_point += if neg_exp { -exp_num } else { exp_num };
255296
}
256-
for i in d.num_digits..Decimal::MAX_DIGITS_WITHOUT_OVERFLOW {
257-
d.digits[i] = 0;
258-
}
297+
add_zero_digits(&mut d);
259298
d
260299
}
261300

src/lib.rs

+30
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,21 @@ pub trait FastFloat: float::Float {
105105
fn parse_float_partial<S: AsRef<[u8]>>(s: S) -> Result<(Self, usize)> {
106106
parse::parse_float(s.as_ref()).ok_or(Error)
107107
}
108+
109+
/// Parse a pre-tokenized decimal number from string into float.
110+
///
111+
/// This assumes the float has already been tokenized into valid
112+
/// integral and fractional components, and has parsed an optional
113+
/// exponent notation.
114+
///
115+
/// It is up to you to validate and tokenize the input: although
116+
/// this will not error, this might truncate the significant
117+
/// digits as soon as an invalid digit is found. This does not
118+
/// handle special values, such as NaN, INF, or Infinity.
119+
#[inline]
120+
fn parse_from_parts<S: AsRef<[u8]>>(integral: S, fractional: S, exponent: i64, negative: bool) -> Self {
121+
parse::parse_from_parts(integral.as_ref(), fractional.as_ref(), exponent, negative)
122+
}
108123
}
109124

110125
impl FastFloat for f32 {}
@@ -134,3 +149,18 @@ pub fn parse<T: FastFloat, S: AsRef<[u8]>>(s: S) -> Result<T> {
134149
pub fn parse_partial<T: FastFloat, S: AsRef<[u8]>>(s: S) -> Result<(T, usize)> {
135150
T::parse_float_partial(s)
136151
}
152+
153+
/// Parse a pre-tokenized decimal number from string into float.
154+
///
155+
/// This assumes the float has already been tokenized into valid
156+
/// integral and fractional components, and has parsed an optional
157+
/// exponent notation.
158+
///
159+
/// It is up to you to validate and tokenize the input: although
160+
/// this will not error, this might truncate the significant
161+
/// digits as soon as an invalid digit is found. This does not
162+
/// handle special values, such as NaN, INF, or Infinity.
163+
#[inline]
164+
pub fn parse_from_parts<T: FastFloat, S: AsRef<[u8]>>(integral: S, fractional: S, exponent: i64, negative: bool) -> T {
165+
T::parse_from_parts(integral.as_ref(), fractional.as_ref(), exponent, negative)
166+
}

0 commit comments

Comments
 (0)