Skip to content

Commit

Permalink
Ryu-like String to Float Parsing (#138)
Browse files Browse the repository at this point in the history
A limited implementation for up to 17-digit numbers.

See #129.

* Move some d2s intrinsics to d2s_intrinsics.h

* Rudimentary string to double parsing

* Implement more pieces for parsing

- parse exponents
- parse dot
- handle negative exponents
- handle rounding
- add some basic tests for these things

* Fix constants

* Use __builtin_clzll

Thanks to expnkx for finding this.

See #133.

* Allow exponent to start with '+'

* Correctly handle mantissa overflow

* Increase the table size

Denormal numbers require a larger table because the decimal exponent can go down to -326 (we're using the table inversely to how it's originally used for shortest).

* Handle most overflow/underflow situations

* Increase table size some more

* More tests close to the underflow

These are almost exactly halfway between 0 and the smallest representable float.

* Implement error handling

Mention the restrictions on the input, and that this implementation is experimental.

* Add another caveat to the documentation

* Make it compile with -DRYU_ONLY_64_BIT_OPS

This mode was not defining mulShift. We just use the same implementation as if it's not set.

* Use _BitScanReverse64 on Windows

* Rename mulShift to mulShift{32,64}

Since I moved the mulShift implementations to d2s_intrinsics, they conflict with the ones defined in f2s.c. This is only an issue when -DRYU_OPTIMIZE_SIZE is set, but I think it's better to disambiguate anyway.

* Rename max to max32 to avoid name conflicts on MSVC

* MSVC: Always use _BitScanReverse64

The HAS_64_BIT_INTRINSICS macro is false if RYU_ONLY_64_BIT_OPS is set, so using that is incorrect. Instead, only check for _MSC_VER. I can't seem to find a macro to check for existence of __builtin_clzll.
  • Loading branch information
ulfjack authored Dec 4, 2019
1 parent 2978dfe commit d4114ce
Show file tree
Hide file tree
Showing 11 changed files with 592 additions and 141 deletions.
11 changes: 11 additions & 0 deletions ryu/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@ cc_library(
hdrs = ["ryu.h"],
)

cc_library(
name = "ryu_parse",
srcs = [
"s2d.c",
"d2s_intrinsics.h",
"d2s_full_table.h",
"common.h",
],
hdrs = ["ryu_parse.h"],
)

cc_library(
name = "generic_128",
srcs = [
Expand Down
12 changes: 11 additions & 1 deletion ryu/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,16 @@ static inline uint32_t decimalLength9(const uint32_t v) {
return 1;
}

// Returns e == 0 ? 1 : [log_2(5^e)]; requires 0 <= e <= 3528.
static inline int32_t log2pow5(const int32_t e) {
// This approximation works up to the point that the multiplication overflows at e = 3529.
// If the multiplication were done in 64 bits, it would fail at 5^4004 which is just greater
// than 2^9297.
assert(e >= 0);
assert(e <= 3528);
return (int32_t) ((((uint32_t) e) * 1217359) >> 19);
}

// Returns e == 0 ? 1 : ceil(log_2(5^e)); requires 0 <= e <= 3528.
static inline int32_t pow5bits(const int32_t e) {
// This approximation works up to the point that the multiplication overflows at e = 3529.
Expand All @@ -54,7 +64,7 @@ static inline int32_t pow5bits(const int32_t e) {

// Returns e == 0 ? 1 : ceil(log_2(5^e)); requires 0 <= e <= 3528.
static inline int32_t ceil_log2pow5(const int32_t e) {
return pow5bits(e);
return log2pow5(e) + 1;
}

// Returns floor(log_10(2^e)); requires 0 <= e <= 1650.
Expand Down
132 changes: 4 additions & 128 deletions ryu/d2s.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,130 +55,6 @@
#define DOUBLE_EXPONENT_BITS 11
#define DOUBLE_BIAS 1023

// We need a 64x128-bit multiplication and a subsequent 128-bit shift.
// Multiplication:
// The 64-bit factor is variable and passed in, the 128-bit factor comes
// from a lookup table. We know that the 64-bit factor only has 55
// significant bits (i.e., the 9 topmost bits are zeros). The 128-bit
// factor only has 124 significant bits (i.e., the 4 topmost bits are
// zeros).
// Shift:
// In principle, the multiplication result requires 55 + 124 = 179 bits to
// represent. However, we then shift this value to the right by j, which is
// at least j >= 115, so the result is guaranteed to fit into 179 - 115 = 64
// bits. This means that we only need the topmost 64 significant bits of
// the 64x128-bit multiplication.
//
// There are several ways to do this:
// 1. Best case: the compiler exposes a 128-bit type.
// We perform two 64x64-bit multiplications, add the higher 64 bits of the
// lower result to the higher result, and shift by j - 64 bits.
//
// We explicitly cast from 64-bit to 128-bit, so the compiler can tell
// that these are only 64-bit inputs, and can map these to the best
// possible sequence of assembly instructions.
// x64 machines happen to have matching assembly instructions for
// 64x64-bit multiplications and 128-bit shifts.
//
// 2. Second best case: the compiler exposes intrinsics for the x64 assembly
// instructions mentioned in 1.
//
// 3. We only have 64x64 bit instructions that return the lower 64 bits of
// the result, i.e., we have to use plain C.
// Our inputs are less than the full width, so we have three options:
// a. Ignore this fact and just implement the intrinsics manually.
// b. Split both into 31-bit pieces, which guarantees no internal overflow,
// but requires extra work upfront (unless we change the lookup table).
// c. Split only the first factor into 31-bit pieces, which also guarantees
// no internal overflow, but requires extra work since the intermediate
// results are not perfectly aligned.
#if defined(HAS_UINT128)

// Best case: use 128-bit type.
static inline uint64_t mulShift(const uint64_t m, const uint64_t* const mul, const int32_t j) {
const uint128_t b0 = ((uint128_t) m) * mul[0];
const uint128_t b2 = ((uint128_t) m) * mul[1];
return (uint64_t) (((b0 >> 64) + b2) >> (j - 64));
}

static inline uint64_t mulShiftAll(const uint64_t m, const uint64_t* const mul, const int32_t j,
uint64_t* const vp, uint64_t* const vm, const uint32_t mmShift) {
// m <<= 2;
// uint128_t b0 = ((uint128_t) m) * mul[0]; // 0
// uint128_t b2 = ((uint128_t) m) * mul[1]; // 64
//
// uint128_t hi = (b0 >> 64) + b2;
// uint128_t lo = b0 & 0xffffffffffffffffull;
// uint128_t factor = (((uint128_t) mul[1]) << 64) + mul[0];
// uint128_t vpLo = lo + (factor << 1);
// *vp = (uint64_t) ((hi + (vpLo >> 64)) >> (j - 64));
// uint128_t vmLo = lo - (factor << mmShift);
// *vm = (uint64_t) ((hi + (vmLo >> 64) - (((uint128_t) 1ull) << 64)) >> (j - 64));
// return (uint64_t) (hi >> (j - 64));
*vp = mulShift(4 * m + 2, mul, j);
*vm = mulShift(4 * m - 1 - mmShift, mul, j);
return mulShift(4 * m, mul, j);
}

#elif defined(HAS_64_BIT_INTRINSICS)

static inline uint64_t mulShift(const uint64_t m, const uint64_t* const mul, const int32_t j) {
// m is maximum 55 bits
uint64_t high1; // 128
const uint64_t low1 = umul128(m, mul[1], &high1); // 64
uint64_t high0; // 64
umul128(m, mul[0], &high0); // 0
const uint64_t sum = high0 + low1;
if (sum < high0) {
++high1; // overflow into high1
}
return shiftright128(sum, high1, j - 64);
}

static inline uint64_t mulShiftAll(const uint64_t m, const uint64_t* const mul, const int32_t j,
uint64_t* const vp, uint64_t* const vm, const uint32_t mmShift) {
*vp = mulShift(4 * m + 2, mul, j);
*vm = mulShift(4 * m - 1 - mmShift, mul, j);
return mulShift(4 * m, mul, j);
}

#else // !defined(HAS_UINT128) && !defined(HAS_64_BIT_INTRINSICS)

static inline uint64_t mulShiftAll(uint64_t m, const uint64_t* const mul, const int32_t j,
uint64_t* const vp, uint64_t* const vm, const uint32_t mmShift) {
m <<= 1;
// m is maximum 55 bits
uint64_t tmp;
const uint64_t lo = umul128(m, mul[0], &tmp);
uint64_t hi;
const uint64_t mid = tmp + umul128(m, mul[1], &hi);
hi += mid < tmp; // overflow into hi

const uint64_t lo2 = lo + mul[0];
const uint64_t mid2 = mid + mul[1] + (lo2 < lo);
const uint64_t hi2 = hi + (mid2 < mid);
*vp = shiftright128(mid2, hi2, (uint32_t) (j - 64 - 1));

if (mmShift == 1) {
const uint64_t lo3 = lo - mul[0];
const uint64_t mid3 = mid - mul[1] - (lo3 > lo);
const uint64_t hi3 = hi - (mid3 > mid);
*vm = shiftright128(mid3, hi3, (uint32_t) (j - 64 - 1));
} else {
const uint64_t lo3 = lo + lo;
const uint64_t mid3 = mid + mid + (lo3 < lo);
const uint64_t hi3 = hi + hi + (mid3 < mid);
const uint64_t lo4 = lo3 - mul[0];
const uint64_t mid4 = mid3 - mul[1] - (lo4 > lo3);
const uint64_t hi4 = hi3 - (mid4 > mid3);
*vm = shiftright128(mid4, hi4, (uint32_t) (j - 64));
}

return shiftright128(mid, hi, (uint32_t) (j - 64 - 1));
}

#endif // HAS_64_BIT_INTRINSICS

static inline uint32_t decimalLength17(const uint64_t v) {
// This is slightly faster than a loop.
// The average output length is 16.38 digits, so we check high-to-low.
Expand Down Expand Up @@ -253,9 +129,9 @@ static inline floating_decimal_64 d2d(const uint64_t ieeeMantissa, const uint32_
#if defined(RYU_OPTIMIZE_SIZE)
uint64_t pow5[2];
double_computeInvPow5(q, pow5);
vr = mulShiftAll(m2, pow5, i, &vp, &vm, mmShift);
vr = mulShiftAll64(m2, pow5, i, &vp, &vm, mmShift);
#else
vr = mulShiftAll(m2, DOUBLE_POW5_INV_SPLIT[q], i, &vp, &vm, mmShift);
vr = mulShiftAll64(m2, DOUBLE_POW5_INV_SPLIT[q], i, &vp, &vm, mmShift);
#endif
#ifdef RYU_DEBUG
printf("%" PRIu64 " * 2^%d / 10^%u\n", mv, e2, q);
Expand Down Expand Up @@ -288,9 +164,9 @@ static inline floating_decimal_64 d2d(const uint64_t ieeeMantissa, const uint32_
#if defined(RYU_OPTIMIZE_SIZE)
uint64_t pow5[2];
double_computePow5(i, pow5);
vr = mulShiftAll(m2, pow5, j, &vp, &vm, mmShift);
vr = mulShiftAll64(m2, pow5, j, &vp, &vm, mmShift);
#else
vr = mulShiftAll(m2, DOUBLE_POW5_SPLIT[i], j, &vp, &vm, mmShift);
vr = mulShiftAll64(m2, DOUBLE_POW5_SPLIT[i], j, &vp, &vm, mmShift);
#endif
#ifdef RYU_DEBUG
printf("%" PRIu64 " * 5^%d / 10^%u\n", mv, -e2, q);
Expand Down
34 changes: 31 additions & 3 deletions ryu/d2s_full_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@
#define DOUBLE_POW5_INV_BITCOUNT 125
#define DOUBLE_POW5_BITCOUNT 125

static const uint64_t DOUBLE_POW5_INV_SPLIT[292][2] = {
#define DOUBLE_POW5_INV_TABLE_SIZE 342
#define DOUBLE_POW5_TABLE_SIZE 326

static const uint64_t DOUBLE_POW5_INV_SPLIT[DOUBLE_POW5_INV_TABLE_SIZE][2] = {
{ 1u, 2305843009213693952u }, { 11068046444225730970u, 1844674407370955161u },
{ 5165088340638674453u, 1475739525896764129u }, { 7821419487252849886u, 1180591620717411303u },
{ 8824922364862649494u, 1888946593147858085u }, { 7059937891890119595u, 1511157274518286468u },
Expand Down Expand Up @@ -167,10 +170,35 @@ static const uint64_t DOUBLE_POW5_INV_SPLIT[292][2] = {
{ 9545822571258895019u, 1714413771498027713u }, { 15015355686490936662u, 1371531017198422170u },
{ 5577825024675947042u, 2194449627517475473u }, { 11840957649224578280u, 1755559702013980378u },
{ 16851463748863483271u, 1404447761611184302u }, { 12204946739213931940u, 2247116418577894884u },
{ 13453306206113055875u, 1797693134862315907u }, { 3383947335406624054u, 1438154507889852726u }
{ 13453306206113055875u, 1797693134862315907u }, { 3383947335406624054u, 1438154507889852726u },
{ 16482362180876329456u, 2301047212623764361u }, { 9496540929959153242u, 1840837770099011489u },
{ 11286581558709232917u, 1472670216079209191u }, { 5339916432225476010u, 1178136172863367353u },
{ 4854517476818851293u, 1885017876581387765u }, { 3883613981455081034u, 1508014301265110212u },
{ 14174937629389795797u, 1206411441012088169u }, { 11611853762797942306u, 1930258305619341071u },
{ 5600134195496443521u, 1544206644495472857u }, { 15548153800622885787u, 1235365315596378285u },
{ 6430302007287065643u, 1976584504954205257u }, { 16212288050055383484u, 1581267603963364205u },
{ 12969830440044306787u, 1265014083170691364u }, { 9683682259845159889u, 2024022533073106183u },
{ 15125643437359948558u, 1619218026458484946u }, { 8411165935146048523u, 1295374421166787957u },
{ 17147214310975587960u, 2072599073866860731u }, { 10028422634038560045u, 1658079259093488585u },
{ 8022738107230848036u, 1326463407274790868u }, { 9147032156827446534u, 2122341451639665389u },
{ 11006974540203867551u, 1697873161311732311u }, { 5116230817421183718u, 1358298529049385849u },
{ 15564666937357714594u, 2173277646479017358u }, { 1383687105660440706u, 1738622117183213887u },
{ 12174996128754083534u, 1390897693746571109u }, { 8411947361780802685u, 2225436309994513775u },
{ 6729557889424642148u, 1780349047995611020u }, { 5383646311539713719u, 1424279238396488816u },
{ 1235136468979721303u, 2278846781434382106u }, { 15745504434151418335u, 1823077425147505684u },
{ 16285752362063044992u, 1458461940118004547u }, { 5649904260166615347u, 1166769552094403638u },
{ 5350498001524674232u, 1866831283351045821u }, { 591049586477829062u, 1493465026680836657u },
{ 11540886113407994219u, 1194772021344669325u }, { 18673707743239135u, 1911635234151470921u },
{ 14772334225162232601u, 1529308187321176736u }, { 8128518565387875758u, 1223446549856941389u },
{ 1937583260394870242u, 1957514479771106223u }, { 8928764237799716840u, 1566011583816884978u },
{ 14521709019723594119u, 1252809267053507982u }, { 8477339172590109297u, 2004494827285612772u },
{ 17849917782297818407u, 1603595861828490217u }, { 6901236596354434079u, 1282876689462792174u },
{ 18420676183650915173u, 2052602703140467478u }, { 3668494502695001169u, 1642082162512373983u },
{ 10313493231639821582u, 1313665730009899186u }, { 9122891541139893884u, 2101865168015838698u },
{ 14677010862395735754u, 1681492134412670958u }, { 673562245690857633u, 1345193707530136767u }
};

static const uint64_t DOUBLE_POW5_SPLIT[326][2] = {
static const uint64_t DOUBLE_POW5_SPLIT[DOUBLE_POW5_TABLE_SIZE][2] = {
{ 0u, 1152921504606846976u }, { 0u, 1441151880758558720u },
{ 0u, 1801439850948198400u }, { 0u, 2251799813685248000u },
{ 0u, 1407374883553280000u }, { 0u, 1759218604441600000u },
Expand Down
138 changes: 138 additions & 0 deletions ryu/d2s_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,4 +219,142 @@ static inline bool multipleOfPowerOf2(const uint64_t value, const uint32_t p) {
return (value & ((1ull << p) - 1)) == 0;
}

// We need a 64x128-bit multiplication and a subsequent 128-bit shift.
// Multiplication:
// The 64-bit factor is variable and passed in, the 128-bit factor comes
// from a lookup table. We know that the 64-bit factor only has 55
// significant bits (i.e., the 9 topmost bits are zeros). The 128-bit
// factor only has 124 significant bits (i.e., the 4 topmost bits are
// zeros).
// Shift:
// In principle, the multiplication result requires 55 + 124 = 179 bits to
// represent. However, we then shift this value to the right by j, which is
// at least j >= 115, so the result is guaranteed to fit into 179 - 115 = 64
// bits. This means that we only need the topmost 64 significant bits of
// the 64x128-bit multiplication.
//
// There are several ways to do this:
// 1. Best case: the compiler exposes a 128-bit type.
// We perform two 64x64-bit multiplications, add the higher 64 bits of the
// lower result to the higher result, and shift by j - 64 bits.
//
// We explicitly cast from 64-bit to 128-bit, so the compiler can tell
// that these are only 64-bit inputs, and can map these to the best
// possible sequence of assembly instructions.
// x64 machines happen to have matching assembly instructions for
// 64x64-bit multiplications and 128-bit shifts.
//
// 2. Second best case: the compiler exposes intrinsics for the x64 assembly
// instructions mentioned in 1.
//
// 3. We only have 64x64 bit instructions that return the lower 64 bits of
// the result, i.e., we have to use plain C.
// Our inputs are less than the full width, so we have three options:
// a. Ignore this fact and just implement the intrinsics manually.
// b. Split both into 31-bit pieces, which guarantees no internal overflow,
// but requires extra work upfront (unless we change the lookup table).
// c. Split only the first factor into 31-bit pieces, which also guarantees
// no internal overflow, but requires extra work since the intermediate
// results are not perfectly aligned.
#if defined(HAS_UINT128)

// Best case: use 128-bit type.
static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, const int32_t j) {
const uint128_t b0 = ((uint128_t) m) * mul[0];
const uint128_t b2 = ((uint128_t) m) * mul[1];
return (uint64_t) (((b0 >> 64) + b2) >> (j - 64));
}

static inline uint64_t mulShiftAll64(const uint64_t m, const uint64_t* const mul, const int32_t j,
uint64_t* const vp, uint64_t* const vm, const uint32_t mmShift) {
// m <<= 2;
// uint128_t b0 = ((uint128_t) m) * mul[0]; // 0
// uint128_t b2 = ((uint128_t) m) * mul[1]; // 64
//
// uint128_t hi = (b0 >> 64) + b2;
// uint128_t lo = b0 & 0xffffffffffffffffull;
// uint128_t factor = (((uint128_t) mul[1]) << 64) + mul[0];
// uint128_t vpLo = lo + (factor << 1);
// *vp = (uint64_t) ((hi + (vpLo >> 64)) >> (j - 64));
// uint128_t vmLo = lo - (factor << mmShift);
// *vm = (uint64_t) ((hi + (vmLo >> 64) - (((uint128_t) 1ull) << 64)) >> (j - 64));
// return (uint64_t) (hi >> (j - 64));
*vp = mulShift64(4 * m + 2, mul, j);
*vm = mulShift64(4 * m - 1 - mmShift, mul, j);
return mulShift64(4 * m, mul, j);
}

#elif defined(HAS_64_BIT_INTRINSICS)

static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, const int32_t j) {
// m is maximum 55 bits
uint64_t high1; // 128
const uint64_t low1 = umul128(m, mul[1], &high1); // 64
uint64_t high0; // 64
umul128(m, mul[0], &high0); // 0
const uint64_t sum = high0 + low1;
if (sum < high0) {
++high1; // overflow into high1
}
return shiftright128(sum, high1, j - 64);
}

static inline uint64_t mulShiftAll64(const uint64_t m, const uint64_t* const mul, const int32_t j,
uint64_t* const vp, uint64_t* const vm, const uint32_t mmShift) {
*vp = mulShift64(4 * m + 2, mul, j);
*vm = mulShift64(4 * m - 1 - mmShift, mul, j);
return mulShift64(4 * m, mul, j);
}

#else // !defined(HAS_UINT128) && !defined(HAS_64_BIT_INTRINSICS)

static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, const int32_t j) {
// m is maximum 55 bits
uint64_t high1; // 128
const uint64_t low1 = umul128(m, mul[1], &high1); // 64
uint64_t high0; // 64
umul128(m, mul[0], &high0); // 0
const uint64_t sum = high0 + low1;
if (sum < high0) {
++high1; // overflow into high1
}
return shiftright128(sum, high1, j - 64);
}

// This is faster if we don't have a 64x64->128-bit multiplication.
static inline uint64_t mulShiftAll64(uint64_t m, const uint64_t* const mul, const int32_t j,
uint64_t* const vp, uint64_t* const vm, const uint32_t mmShift) {
m <<= 1;
// m is maximum 55 bits
uint64_t tmp;
const uint64_t lo = umul128(m, mul[0], &tmp);
uint64_t hi;
const uint64_t mid = tmp + umul128(m, mul[1], &hi);
hi += mid < tmp; // overflow into hi

const uint64_t lo2 = lo + mul[0];
const uint64_t mid2 = mid + mul[1] + (lo2 < lo);
const uint64_t hi2 = hi + (mid2 < mid);
*vp = shiftright128(mid2, hi2, (uint32_t) (j - 64 - 1));

if (mmShift == 1) {
const uint64_t lo3 = lo - mul[0];
const uint64_t mid3 = mid - mul[1] - (lo3 > lo);
const uint64_t hi3 = hi - (mid3 > mid);
*vm = shiftright128(mid3, hi3, (uint32_t) (j - 64 - 1));
} else {
const uint64_t lo3 = lo + lo;
const uint64_t mid3 = mid + mid + (lo3 < lo);
const uint64_t hi3 = hi + hi + (mid3 < mid);
const uint64_t lo4 = lo3 - mul[0];
const uint64_t mid4 = mid3 - mul[1] - (lo4 > lo3);
const uint64_t hi4 = hi3 - (mid4 > mid3);
*vm = shiftright128(mid4, hi4, (uint32_t) (j - 64));
}

return shiftright128(mid, hi, (uint32_t) (j - 64 - 1));
}

#endif // HAS_64_BIT_INTRINSICS

#endif // RYU_D2S_INTRINSICS_H
Loading

0 comments on commit d4114ce

Please sign in to comment.