Ryu-like String to Float Parsing (#138)

A limited implementation for up to 17-digit numbers. See #129. * Move some d2s intrinsics to d2s_intrinsics.h * Rudimentary string to double parsing * Implement more pieces for parsing - parse exponents - parse dot - handle negative exponents - handle rounding - add some basic tests for these things * Fix constants * Use __builtin_clzll Thanks to expnkx for finding this. See #133. * Allow exponent to start with '+' * Correctly handle mantissa overflow * Increase the table size Denormal numbers require a larger table because the decimal exponent can go down to -326 (we're using the table inversely to how it's originally used for shortest). * Handle most overflow/underflow situations * Increase table size some more * More tests close to the underflow These are almost exactly halfway between 0 and the smallest representable float. * Implement error handling Mention the restrictions on the input, and that this implementation is experimental. * Add another caveat to the documentation * Make it compile with -DRYU_ONLY_64_BIT_OPS This mode was not defining mulShift. We just use the same implementation as if it's not set. * Use _BitScanReverse64 on Windows * Rename mulShift to mulShift{32,64} Since I moved the mulShift implementations to d2s_intrinsics, they conflict with the ones defined in f2s.c. This is only an issue when -DRYU_OPTIMIZE_SIZE is set, but I think it's better to disambiguate anyway. * Rename max to max32 to avoid name conflicts on MSVC * MSVC: Always use _BitScanReverse64 The HAS_64_BIT_INTRINSICS macro is false if RYU_ONLY_64_BIT_OPS is set, so using that is incorrect. Instead, only check for _MSC_VER. I can't seem to find a macro to check for existence of __builtin_clzll.
ulfjack · Dec 4, 2019 · d4114ce · d4114ce
1 parent 2978dfe
commit d4114ce
Show file tree

Hide file tree

Showing 11 changed files with 592 additions and 141 deletions.
diff --git a/ryu/BUILD b/ryu/BUILD
@@ -17,6 +17,17 @@ cc_library(
   hdrs = ["ryu.h"],
 )
 
+cc_library(
+  name = "ryu_parse",
+  srcs = [
+    "s2d.c",
+    "d2s_intrinsics.h",
+    "d2s_full_table.h",
+    "common.h",
+  ],
+  hdrs = ["ryu_parse.h"],
+)
+
 cc_library(
   name = "generic_128",
   srcs = [

diff --git a/ryu/common.h b/ryu/common.h
@@ -42,6 +42,16 @@ static inline uint32_t decimalLength9(const uint32_t v) {
   return 1;
 }
 
+// Returns e == 0 ? 1 : [log_2(5^e)]; requires 0 <= e <= 3528.
+static inline int32_t log2pow5(const int32_t e) {
+  // This approximation works up to the point that the multiplication overflows at e = 3529.
+  // If the multiplication were done in 64 bits, it would fail at 5^4004 which is just greater
+  // than 2^9297.
+  assert(e >= 0);
+  assert(e <= 3528);
+  return (int32_t) ((((uint32_t) e) * 1217359) >> 19);
+}
+
 // Returns e == 0 ? 1 : ceil(log_2(5^e)); requires 0 <= e <= 3528.
 static inline int32_t pow5bits(const int32_t e) {
   // This approximation works up to the point that the multiplication overflows at e = 3529.
@@ -54,7 +64,7 @@ static inline int32_t pow5bits(const int32_t e) {
 
 // Returns e == 0 ? 1 : ceil(log_2(5^e)); requires 0 <= e <= 3528.
 static inline int32_t ceil_log2pow5(const int32_t e) {
-  return pow5bits(e);
+  return log2pow5(e) + 1;
 }
 
 // Returns floor(log_10(2^e)); requires 0 <= e <= 1650.

diff --git a/ryu/d2s.c b/ryu/d2s.c
@@ -55,130 +55,6 @@
 #define DOUBLE_EXPONENT_BITS 11
 #define DOUBLE_BIAS 1023
 
-// We need a 64x128-bit multiplication and a subsequent 128-bit shift.
-// Multiplication:
-//   The 64-bit factor is variable and passed in, the 128-bit factor comes
-//   from a lookup table. We know that the 64-bit factor only has 55
-//   significant bits (i.e., the 9 topmost bits are zeros). The 128-bit
-//   factor only has 124 significant bits (i.e., the 4 topmost bits are
-//   zeros).
-// Shift:
-//   In principle, the multiplication result requires 55 + 124 = 179 bits to
-//   represent. However, we then shift this value to the right by j, which is
-//   at least j >= 115, so the result is guaranteed to fit into 179 - 115 = 64
-//   bits. This means that we only need the topmost 64 significant bits of
-//   the 64x128-bit multiplication.
-//
-// There are several ways to do this:
-// 1. Best case: the compiler exposes a 128-bit type.
-//    We perform two 64x64-bit multiplications, add the higher 64 bits of the
-//    lower result to the higher result, and shift by j - 64 bits.
-//
-//    We explicitly cast from 64-bit to 128-bit, so the compiler can tell
-//    that these are only 64-bit inputs, and can map these to the best
-//    possible sequence of assembly instructions.
-//    x64 machines happen to have matching assembly instructions for
-//    64x64-bit multiplications and 128-bit shifts.
-//
-// 2. Second best case: the compiler exposes intrinsics for the x64 assembly
-//    instructions mentioned in 1.
-//
-// 3. We only have 64x64 bit instructions that return the lower 64 bits of
-//    the result, i.e., we have to use plain C.
-//    Our inputs are less than the full width, so we have three options:
-//    a. Ignore this fact and just implement the intrinsics manually.
-//    b. Split both into 31-bit pieces, which guarantees no internal overflow,
-//       but requires extra work upfront (unless we change the lookup table).
-//    c. Split only the first factor into 31-bit pieces, which also guarantees
-//       no internal overflow, but requires extra work since the intermediate
-//       results are not perfectly aligned.
-#if defined(HAS_UINT128)
-
-// Best case: use 128-bit type.
-static inline uint64_t mulShift(const uint64_t m, const uint64_t* const mul, const int32_t j) {
-  const uint128_t b0 = ((uint128_t) m) * mul[0];
-  const uint128_t b2 = ((uint128_t) m) * mul[1];
-  return (uint64_t) (((b0 >> 64) + b2) >> (j - 64));
-}
-
-static inline uint64_t mulShiftAll(const uint64_t m, const uint64_t* const mul, const int32_t j,
-  uint64_t* const vp, uint64_t* const vm, const uint32_t mmShift) {
-//  m <<= 2;
-//  uint128_t b0 = ((uint128_t) m) * mul[0]; // 0
-//  uint128_t b2 = ((uint128_t) m) * mul[1]; // 64
-//
-//  uint128_t hi = (b0 >> 64) + b2;
-//  uint128_t lo = b0 & 0xffffffffffffffffull;
-//  uint128_t factor = (((uint128_t) mul[1]) << 64) + mul[0];
-//  uint128_t vpLo = lo + (factor << 1);
-//  *vp = (uint64_t) ((hi + (vpLo >> 64)) >> (j - 64));
-//  uint128_t vmLo = lo - (factor << mmShift);
-//  *vm = (uint64_t) ((hi + (vmLo >> 64) - (((uint128_t) 1ull) << 64)) >> (j - 64));
-//  return (uint64_t) (hi >> (j - 64));
-  *vp = mulShift(4 * m + 2, mul, j);
-  *vm = mulShift(4 * m - 1 - mmShift, mul, j);
-  return mulShift(4 * m, mul, j);
-}
-
-#elif defined(HAS_64_BIT_INTRINSICS)
-
-static inline uint64_t mulShift(const uint64_t m, const uint64_t* const mul, const int32_t j) {
-  // m is maximum 55 bits
-  uint64_t high1;                                   // 128
-  const uint64_t low1 = umul128(m, mul[1], &high1); // 64
-  uint64_t high0;                                   // 64
-  umul128(m, mul[0], &high0);                       // 0
-  const uint64_t sum = high0 + low1;
-  if (sum < high0) {
-    ++high1; // overflow into high1
-  }
-  return shiftright128(sum, high1, j - 64);
-}
-
-static inline uint64_t mulShiftAll(const uint64_t m, const uint64_t* const mul, const int32_t j,
-  uint64_t* const vp, uint64_t* const vm, const uint32_t mmShift) {
-  *vp = mulShift(4 * m + 2, mul, j);
-  *vm = mulShift(4 * m - 1 - mmShift, mul, j);
-  return mulShift(4 * m, mul, j);
-}
-
-#else // !defined(HAS_UINT128) && !defined(HAS_64_BIT_INTRINSICS)
-
-static inline uint64_t mulShiftAll(uint64_t m, const uint64_t* const mul, const int32_t j,
-  uint64_t* const vp, uint64_t* const vm, const uint32_t mmShift) {
-  m <<= 1;
-  // m is maximum 55 bits
-  uint64_t tmp;
-  const uint64_t lo = umul128(m, mul[0], &tmp);
-  uint64_t hi;
-  const uint64_t mid = tmp + umul128(m, mul[1], &hi);
-  hi += mid < tmp; // overflow into hi
-
-  const uint64_t lo2 = lo + mul[0];
-  const uint64_t mid2 = mid + mul[1] + (lo2 < lo);
-  const uint64_t hi2 = hi + (mid2 < mid);
-  *vp = shiftright128(mid2, hi2, (uint32_t) (j - 64 - 1));
-
-  if (mmShift == 1) {
-    const uint64_t lo3 = lo - mul[0];
-    const uint64_t mid3 = mid - mul[1] - (lo3 > lo);
-    const uint64_t hi3 = hi - (mid3 > mid);
-    *vm = shiftright128(mid3, hi3, (uint32_t) (j - 64 - 1));
-  } else {
-    const uint64_t lo3 = lo + lo;
-    const uint64_t mid3 = mid + mid + (lo3 < lo);
-    const uint64_t hi3 = hi + hi + (mid3 < mid);
-    const uint64_t lo4 = lo3 - mul[0];
-    const uint64_t mid4 = mid3 - mul[1] - (lo4 > lo3);
-    const uint64_t hi4 = hi3 - (mid4 > mid3);
-    *vm = shiftright128(mid4, hi4, (uint32_t) (j - 64));
-  }
-
-  return shiftright128(mid, hi, (uint32_t) (j - 64 - 1));
-}
-
-#endif // HAS_64_BIT_INTRINSICS
-
 static inline uint32_t decimalLength17(const uint64_t v) {
   // This is slightly faster than a loop.
   // The average output length is 16.38 digits, so we check high-to-low.
@@ -253,9 +129,9 @@ static inline floating_decimal_64 d2d(const uint64_t ieeeMantissa, const uint32_
 #if defined(RYU_OPTIMIZE_SIZE)
     uint64_t pow5[2];
     double_computeInvPow5(q, pow5);
-    vr = mulShiftAll(m2, pow5, i, &vp, &vm, mmShift);
+    vr = mulShiftAll64(m2, pow5, i, &vp, &vm, mmShift);
 #else
-    vr = mulShiftAll(m2, DOUBLE_POW5_INV_SPLIT[q], i, &vp, &vm, mmShift);
+    vr = mulShiftAll64(m2, DOUBLE_POW5_INV_SPLIT[q], i, &vp, &vm, mmShift);
 #endif
 #ifdef RYU_DEBUG
     printf("%" PRIu64 " * 2^%d / 10^%u\n", mv, e2, q);
@@ -288,9 +164,9 @@ static inline floating_decimal_64 d2d(const uint64_t ieeeMantissa, const uint32_
 #if defined(RYU_OPTIMIZE_SIZE)
     uint64_t pow5[2];
     double_computePow5(i, pow5);
-    vr = mulShiftAll(m2, pow5, j, &vp, &vm, mmShift);
+    vr = mulShiftAll64(m2, pow5, j, &vp, &vm, mmShift);
 #else
-    vr = mulShiftAll(m2, DOUBLE_POW5_SPLIT[i], j, &vp, &vm, mmShift);
+    vr = mulShiftAll64(m2, DOUBLE_POW5_SPLIT[i], j, &vp, &vm, mmShift);
 #endif
 #ifdef RYU_DEBUG
     printf("%" PRIu64 " * 5^%d / 10^%u\n", mv, -e2, q);

diff --git a/ryu/d2s_full_table.h b/ryu/d2s_full_table.h
@@ -21,7 +21,10 @@
 #define DOUBLE_POW5_INV_BITCOUNT 125
 #define DOUBLE_POW5_BITCOUNT 125
 
-static const uint64_t DOUBLE_POW5_INV_SPLIT[292][2] = {
+#define DOUBLE_POW5_INV_TABLE_SIZE 342
+#define DOUBLE_POW5_TABLE_SIZE 326
+
+static const uint64_t DOUBLE_POW5_INV_SPLIT[DOUBLE_POW5_INV_TABLE_SIZE][2] = {
   {                    1u, 2305843009213693952u }, { 11068046444225730970u, 1844674407370955161u },
   {  5165088340638674453u, 1475739525896764129u }, {  7821419487252849886u, 1180591620717411303u },
   {  8824922364862649494u, 1888946593147858085u }, {  7059937891890119595u, 1511157274518286468u },
@@ -167,10 +170,35 @@ static const uint64_t DOUBLE_POW5_INV_SPLIT[292][2] = {
   {  9545822571258895019u, 1714413771498027713u }, { 15015355686490936662u, 1371531017198422170u },
   {  5577825024675947042u, 2194449627517475473u }, { 11840957649224578280u, 1755559702013980378u },
   { 16851463748863483271u, 1404447761611184302u }, { 12204946739213931940u, 2247116418577894884u },
-  { 13453306206113055875u, 1797693134862315907u }, {  3383947335406624054u, 1438154507889852726u }
+  { 13453306206113055875u, 1797693134862315907u }, {  3383947335406624054u, 1438154507889852726u },
+  { 16482362180876329456u, 2301047212623764361u }, {  9496540929959153242u, 1840837770099011489u },
+  { 11286581558709232917u, 1472670216079209191u }, {  5339916432225476010u, 1178136172863367353u },
+  {  4854517476818851293u, 1885017876581387765u }, {  3883613981455081034u, 1508014301265110212u },
+  { 14174937629389795797u, 1206411441012088169u }, { 11611853762797942306u, 1930258305619341071u },
+  {  5600134195496443521u, 1544206644495472857u }, { 15548153800622885787u, 1235365315596378285u },
+  {  6430302007287065643u, 1976584504954205257u }, { 16212288050055383484u, 1581267603963364205u },
+  { 12969830440044306787u, 1265014083170691364u }, {  9683682259845159889u, 2024022533073106183u },
+  { 15125643437359948558u, 1619218026458484946u }, {  8411165935146048523u, 1295374421166787957u },
+  { 17147214310975587960u, 2072599073866860731u }, { 10028422634038560045u, 1658079259093488585u },
+  {  8022738107230848036u, 1326463407274790868u }, {  9147032156827446534u, 2122341451639665389u },
+  { 11006974540203867551u, 1697873161311732311u }, {  5116230817421183718u, 1358298529049385849u },
+  { 15564666937357714594u, 2173277646479017358u }, {  1383687105660440706u, 1738622117183213887u },
+  { 12174996128754083534u, 1390897693746571109u }, {  8411947361780802685u, 2225436309994513775u },
+  {  6729557889424642148u, 1780349047995611020u }, {  5383646311539713719u, 1424279238396488816u },
+  {  1235136468979721303u, 2278846781434382106u }, { 15745504434151418335u, 1823077425147505684u },
+  { 16285752362063044992u, 1458461940118004547u }, {  5649904260166615347u, 1166769552094403638u },
+  {  5350498001524674232u, 1866831283351045821u }, {   591049586477829062u, 1493465026680836657u },
+  { 11540886113407994219u, 1194772021344669325u }, {    18673707743239135u, 1911635234151470921u },
+  { 14772334225162232601u, 1529308187321176736u }, {  8128518565387875758u, 1223446549856941389u },
+  {  1937583260394870242u, 1957514479771106223u }, {  8928764237799716840u, 1566011583816884978u },
+  { 14521709019723594119u, 1252809267053507982u }, {  8477339172590109297u, 2004494827285612772u },
+  { 17849917782297818407u, 1603595861828490217u }, {  6901236596354434079u, 1282876689462792174u },
+  { 18420676183650915173u, 2052602703140467478u }, {  3668494502695001169u, 1642082162512373983u },
+  { 10313493231639821582u, 1313665730009899186u }, {  9122891541139893884u, 2101865168015838698u },
+  { 14677010862395735754u, 1681492134412670958u }, {   673562245690857633u, 1345193707530136767u }
 };
 
-static const uint64_t DOUBLE_POW5_SPLIT[326][2] = {
+static const uint64_t DOUBLE_POW5_SPLIT[DOUBLE_POW5_TABLE_SIZE][2] = {
   {                    0u, 1152921504606846976u }, {                    0u, 1441151880758558720u },
   {                    0u, 1801439850948198400u }, {                    0u, 2251799813685248000u },
   {                    0u, 1407374883553280000u }, {                    0u, 1759218604441600000u },

diff --git a/ryu/d2s_intrinsics.h b/ryu/d2s_intrinsics.h
@@ -219,4 +219,142 @@ static inline bool multipleOfPowerOf2(const uint64_t value, const uint32_t p) {
   return (value & ((1ull << p) - 1)) == 0;
 }
 
+// We need a 64x128-bit multiplication and a subsequent 128-bit shift.
+// Multiplication:
+//   The 64-bit factor is variable and passed in, the 128-bit factor comes
+//   from a lookup table. We know that the 64-bit factor only has 55
+//   significant bits (i.e., the 9 topmost bits are zeros). The 128-bit
+//   factor only has 124 significant bits (i.e., the 4 topmost bits are
+//   zeros).
+// Shift:
+//   In principle, the multiplication result requires 55 + 124 = 179 bits to
+//   represent. However, we then shift this value to the right by j, which is
+//   at least j >= 115, so the result is guaranteed to fit into 179 - 115 = 64
+//   bits. This means that we only need the topmost 64 significant bits of
+//   the 64x128-bit multiplication.
+//
+// There are several ways to do this:
+// 1. Best case: the compiler exposes a 128-bit type.
+//    We perform two 64x64-bit multiplications, add the higher 64 bits of the
+//    lower result to the higher result, and shift by j - 64 bits.
+//
+//    We explicitly cast from 64-bit to 128-bit, so the compiler can tell
+//    that these are only 64-bit inputs, and can map these to the best
+//    possible sequence of assembly instructions.
+//    x64 machines happen to have matching assembly instructions for
+//    64x64-bit multiplications and 128-bit shifts.
+//
+// 2. Second best case: the compiler exposes intrinsics for the x64 assembly
+//    instructions mentioned in 1.
+//
+// 3. We only have 64x64 bit instructions that return the lower 64 bits of
+//    the result, i.e., we have to use plain C.
+//    Our inputs are less than the full width, so we have three options:
+//    a. Ignore this fact and just implement the intrinsics manually.
+//    b. Split both into 31-bit pieces, which guarantees no internal overflow,
+//       but requires extra work upfront (unless we change the lookup table).
+//    c. Split only the first factor into 31-bit pieces, which also guarantees
+//       no internal overflow, but requires extra work since the intermediate
+//       results are not perfectly aligned.
+#if defined(HAS_UINT128)
+
+// Best case: use 128-bit type.
+static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, const int32_t j) {
+  const uint128_t b0 = ((uint128_t) m) * mul[0];
+  const uint128_t b2 = ((uint128_t) m) * mul[1];
+  return (uint64_t) (((b0 >> 64) + b2) >> (j - 64));
+}
+
+static inline uint64_t mulShiftAll64(const uint64_t m, const uint64_t* const mul, const int32_t j,
+  uint64_t* const vp, uint64_t* const vm, const uint32_t mmShift) {
+//  m <<= 2;
+//  uint128_t b0 = ((uint128_t) m) * mul[0]; // 0
+//  uint128_t b2 = ((uint128_t) m) * mul[1]; // 64
+//
+//  uint128_t hi = (b0 >> 64) + b2;
+//  uint128_t lo = b0 & 0xffffffffffffffffull;
+//  uint128_t factor = (((uint128_t) mul[1]) << 64) + mul[0];
+//  uint128_t vpLo = lo + (factor << 1);
+//  *vp = (uint64_t) ((hi + (vpLo >> 64)) >> (j - 64));
+//  uint128_t vmLo = lo - (factor << mmShift);
+//  *vm = (uint64_t) ((hi + (vmLo >> 64) - (((uint128_t) 1ull) << 64)) >> (j - 64));
+//  return (uint64_t) (hi >> (j - 64));
+  *vp = mulShift64(4 * m + 2, mul, j);
+  *vm = mulShift64(4 * m - 1 - mmShift, mul, j);
+  return mulShift64(4 * m, mul, j);
+}
+
+#elif defined(HAS_64_BIT_INTRINSICS)
+
+static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, const int32_t j) {
+  // m is maximum 55 bits
+  uint64_t high1;                                   // 128
+  const uint64_t low1 = umul128(m, mul[1], &high1); // 64
+  uint64_t high0;                                   // 64
+  umul128(m, mul[0], &high0);                       // 0
+  const uint64_t sum = high0 + low1;
+  if (sum < high0) {
+    ++high1; // overflow into high1
+  }
+  return shiftright128(sum, high1, j - 64);
+}
+
+static inline uint64_t mulShiftAll64(const uint64_t m, const uint64_t* const mul, const int32_t j,
+  uint64_t* const vp, uint64_t* const vm, const uint32_t mmShift) {
+  *vp = mulShift64(4 * m + 2, mul, j);
+  *vm = mulShift64(4 * m - 1 - mmShift, mul, j);
+  return mulShift64(4 * m, mul, j);
+}
+
+#else // !defined(HAS_UINT128) && !defined(HAS_64_BIT_INTRINSICS)
+
+static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, const int32_t j) {
+  // m is maximum 55 bits
+  uint64_t high1;                                   // 128
+  const uint64_t low1 = umul128(m, mul[1], &high1); // 64
+  uint64_t high0;                                   // 64
+  umul128(m, mul[0], &high0);                       // 0
+  const uint64_t sum = high0 + low1;
+  if (sum < high0) {
+    ++high1; // overflow into high1
+  }
+  return shiftright128(sum, high1, j - 64);
+}
+
+// This is faster if we don't have a 64x64->128-bit multiplication.
+static inline uint64_t mulShiftAll64(uint64_t m, const uint64_t* const mul, const int32_t j,
+  uint64_t* const vp, uint64_t* const vm, const uint32_t mmShift) {
+  m <<= 1;
+  // m is maximum 55 bits
+  uint64_t tmp;
+  const uint64_t lo = umul128(m, mul[0], &tmp);
+  uint64_t hi;
+  const uint64_t mid = tmp + umul128(m, mul[1], &hi);
+  hi += mid < tmp; // overflow into hi
+
+  const uint64_t lo2 = lo + mul[0];
+  const uint64_t mid2 = mid + mul[1] + (lo2 < lo);
+  const uint64_t hi2 = hi + (mid2 < mid);
+  *vp = shiftright128(mid2, hi2, (uint32_t) (j - 64 - 1));
+
+  if (mmShift == 1) {
+    const uint64_t lo3 = lo - mul[0];
+    const uint64_t mid3 = mid - mul[1] - (lo3 > lo);
+    const uint64_t hi3 = hi - (mid3 > mid);
+    *vm = shiftright128(mid3, hi3, (uint32_t) (j - 64 - 1));
+  } else {
+    const uint64_t lo3 = lo + lo;
+    const uint64_t mid3 = mid + mid + (lo3 < lo);
+    const uint64_t hi3 = hi + hi + (mid3 < mid);
+    const uint64_t lo4 = lo3 - mul[0];
+    const uint64_t mid4 = mid3 - mul[1] - (lo4 > lo3);
+    const uint64_t hi4 = hi3 - (mid4 > mid3);
+    *vm = shiftright128(mid4, hi4, (uint32_t) (j - 64));
+  }
+
+  return shiftright128(mid, hi, (uint32_t) (j - 64 - 1));
+}
+
+#endif // HAS_64_BIT_INTRINSICS
+
 #endif // RYU_D2S_INTRINSICS_H