All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
dtoa.h
1 // Copyright (C) 2011 Milo Yip
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 // This is a C++ header-only implementation of Grisu2 algorithm from the publication:
22 // Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
23 // integers." ACM Sigplan Notices 45.6 (2010): 233-243.
24 
25 #ifndef RAPIDJSON_DTOA_
26 #define RAPIDJSON_DTOA_
27 
28 #if defined(_MSC_VER)
29 #include <intrin.h>
30 #if defined(_M_AMD64)
31 #pragma intrinsic(_BitScanReverse64)
32 #endif
33 #endif
34 
35 #include "itoa.h" // GetDigitsLut()
36 
37 namespace rapidjson {
38 namespace internal {
39 
40 #ifdef __GNUC__
41 RAPIDJSON_DIAG_PUSH
42 RAPIDJSON_DIAG_OFF(effc++)
43 #endif
44 
45 struct DiyFp {
46  DiyFp() {}
47 
48  DiyFp(uint64_t f, int e) : f(f), e(e) {}
49 
50  DiyFp(double d) {
51  union {
52  double d;
53  uint64_t u64;
54  } u = { d };
55 
56  int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
57  uint64_t significand = (u.u64 & kDpSignificandMask);
58  if (biased_e != 0) {
59  f = significand + kDpHiddenBit;
60  e = biased_e - kDpExponentBias;
61  }
62  else {
63  f = significand;
64  e = kDpMinExponent + 1;
65  }
66  }
67 
68  DiyFp operator-(const DiyFp& rhs) const {
69  return DiyFp(f - rhs.f, e);
70  }
71 
72  DiyFp operator*(const DiyFp& rhs) const {
73 #if defined(_MSC_VER) && defined(_M_AMD64)
74  uint64_t h;
75  uint64_t l = _umul128(f, rhs.f, &h);
76  if (l & (uint64_t(1) << 63)) // rounding
77  h++;
78  return DiyFp(h, e + rhs.e + 64);
79 #elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
80  unsigned __int128 p = static_cast<unsigned __int128>(f) * static_cast<unsigned __int128>(rhs.f);
81  uint64_t h = static_cast<uint64_t>(p >> 64);
82  uint64_t l = static_cast<uint64_t>(p);
83  if (l & (uint64_t(1) << 63)) // rounding
84  h++;
85  return DiyFp(h, e + rhs.e + 64);
86 #else
87  const uint64_t M32 = 0xFFFFFFFF;
88  const uint64_t a = f >> 32;
89  const uint64_t b = f & M32;
90  const uint64_t c = rhs.f >> 32;
91  const uint64_t d = rhs.f & M32;
92  const uint64_t ac = a * c;
93  const uint64_t bc = b * c;
94  const uint64_t ad = a * d;
95  const uint64_t bd = b * d;
96  uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
97  tmp += 1U << 31; /// mult_round
98  return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
99 #endif
100  }
101 
102  DiyFp Normalize() const {
103 #if defined(_MSC_VER) && defined(_M_AMD64)
104  unsigned long index;
105  _BitScanReverse64(&index, f);
106  return DiyFp(f << (63 - index), e - (63 - index));
107 #elif defined(__GNUC__)
108  int s = __builtin_clzll(f);
109  return DiyFp(f << s, e - s);
110 #else
111  DiyFp res = *this;
112  while (!(res.f & kDpHiddenBit)) {
113  res.f <<= 1;
114  res.e--;
115  }
116  res.f <<= (kDiySignificandSize - kDpSignificandSize - 1);
117  res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 1);
118  return res;
119 #endif
120  }
121 
122  DiyFp NormalizeBoundary() const {
123 #if defined(_MSC_VER) && defined(_M_AMD64)
124  unsigned long index;
125  _BitScanReverse64(&index, f);
126  return DiyFp (f << (63 - index), e - (63 - index));
127 #else
128  DiyFp res = *this;
129  while (!(res.f & (kDpHiddenBit << 1))) {
130  res.f <<= 1;
131  res.e--;
132  }
133  res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
134  res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
135  return res;
136 #endif
137  }
138 
139  void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
140  DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
141  DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
142  mi.f <<= mi.e - pl.e;
143  mi.e = pl.e;
144  *plus = pl;
145  *minus = mi;
146  }
147 
148  static const int kDiySignificandSize = 64;
149  static const int kDpSignificandSize = 52;
150  static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
151  static const int kDpMinExponent = -kDpExponentBias;
152  static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
153  static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
154  static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
155 
156  uint64_t f;
157  int e;
158 };
159 
160 inline DiyFp GetCachedPower(int e, int* K) {
161  // 10^-348, 10^-340, ..., 10^340
162  static const uint64_t kCachedPowers_F[] = {
163  RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
164  RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
165  RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
166  RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
167  RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
168  RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
169  RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
170  RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
171  RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
172  RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
173  RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
174  RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
175  RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
176  RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
177  RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
178  RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
179  RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
180  RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
181  RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
182  RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
183  RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
184  RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
185  RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
186  RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
187  RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
188  RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
189  RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
190  RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
191  RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
192  RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
193  RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
194  RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
195  RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
196  RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
197  RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
198  RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
199  RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
200  RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
201  RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
202  RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
203  RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
204  RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
205  RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
206  RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
207  };
208  static const int16_t kCachedPowers_E[] = {
209  -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980,
210  -954, -927, -901, -874, -847, -821, -794, -768, -741, -715,
211  -688, -661, -635, -608, -582, -555, -529, -502, -475, -449,
212  -422, -396, -369, -343, -316, -289, -263, -236, -210, -183,
213  -157, -130, -103, -77, -50, -24, 3, 30, 56, 83,
214  109, 136, 162, 189, 216, 242, 269, 295, 322, 348,
215  375, 402, 428, 455, 481, 508, 534, 561, 588, 614,
216  641, 667, 694, 720, 747, 774, 800, 827, 853, 880,
217  907, 933, 960, 986, 1013, 1039, 1066
218  };
219 
220  //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
221  double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
222  int k = static_cast<int>(dk);
223  if (k != dk)
224  k++;
225 
226  unsigned index = static_cast<unsigned>((k >> 3) + 1);
227  *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
228 
229  return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
230 }
231 
232 inline void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w) {
233  while (rest < wp_w && delta - rest >= ten_kappa &&
234  (rest + ten_kappa < wp_w || /// closer
235  wp_w - rest > rest + ten_kappa - wp_w)) {
236  buffer[len - 1]--;
237  rest += ten_kappa;
238  }
239 }
240 
241 inline unsigned CountDecimalDigit32(uint32_t n) {
242  // Simple pure C++ implementation was faster than __builtin_clz version in this situation.
243  if (n < 10) return 1;
244  if (n < 100) return 2;
245  if (n < 1000) return 3;
246  if (n < 10000) return 4;
247  if (n < 100000) return 5;
248  if (n < 1000000) return 6;
249  if (n < 10000000) return 7;
250  if (n < 100000000) return 8;
251  if (n < 1000000000) return 9;
252  return 10;
253 }
254 
255 inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K) {
256  static const uint32_t kPow10[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 };
257  const DiyFp one(uint64_t(1) << -Mp.e, Mp.e);
258  const DiyFp wp_w = Mp - W;
259  uint32_t p1 = static_cast<uint32_t>(Mp.f >> -one.e);
260  uint64_t p2 = Mp.f & (one.f - 1);
261  int kappa = CountDecimalDigit32(p1);
262  *len = 0;
263 
264  while (kappa > 0) {
265  uint32_t d;
266  switch (kappa) {
267  case 10: d = p1 / 1000000000; p1 %= 1000000000; break;
268  case 9: d = p1 / 100000000; p1 %= 100000000; break;
269  case 8: d = p1 / 10000000; p1 %= 10000000; break;
270  case 7: d = p1 / 1000000; p1 %= 1000000; break;
271  case 6: d = p1 / 100000; p1 %= 100000; break;
272  case 5: d = p1 / 10000; p1 %= 10000; break;
273  case 4: d = p1 / 1000; p1 %= 1000; break;
274  case 3: d = p1 / 100; p1 %= 100; break;
275  case 2: d = p1 / 10; p1 %= 10; break;
276  case 1: d = p1; p1 = 0; break;
277  default:
278 #if defined(_MSC_VER)
279  __assume(0);
280 #elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
281  __builtin_unreachable();
282 #else
283  d = 0;
284 #endif
285  }
286  if (d || *len)
287  buffer[(*len)++] = static_cast<char>('0' + static_cast<char>(d));
288  kappa--;
289  uint64_t tmp = (static_cast<uint64_t>(p1) << -one.e) + p2;
290  if (tmp <= delta) {
291  *K += kappa;
292  GrisuRound(buffer, *len, delta, tmp, static_cast<uint64_t>(kPow10[kappa]) << -one.e, wp_w.f);
293  return;
294  }
295  }
296 
297  // kappa = 0
298  for (;;) {
299  p2 *= 10;
300  delta *= 10;
301  char d = static_cast<char>(p2 >> -one.e);
302  if (d || *len)
303  buffer[(*len)++] = static_cast<char>('0' + d);
304  p2 &= one.f - 1;
305  kappa--;
306  if (p2 < delta) {
307  *K += kappa;
308  GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * kPow10[-kappa]);
309  return;
310  }
311  }
312 }
313 
314 inline void Grisu2(double value, char* buffer, int* length, int* K) {
315  const DiyFp v(value);
316  DiyFp w_m, w_p;
317  v.NormalizedBoundaries(&w_m, &w_p);
318 
319  const DiyFp c_mk = GetCachedPower(w_p.e, K);
320  const DiyFp W = v.Normalize() * c_mk;
321  DiyFp Wp = w_p * c_mk;
322  DiyFp Wm = w_m * c_mk;
323  Wm.f++;
324  Wp.f--;
325  DigitGen(W, Wp, Wp.f - Wm.f, buffer, length, K);
326 }
327 
328 inline char* WriteExponent(int K, char* buffer) {
329  if (K < 0) {
330  *buffer++ = '-';
331  K = -K;
332  }
333 
334  if (K >= 100) {
335  *buffer++ = static_cast<char>('0' + static_cast<char>(K / 100));
336  K %= 100;
337  const char* d = GetDigitsLut() + K * 2;
338  *buffer++ = d[0];
339  *buffer++ = d[1];
340  }
341  else if (K >= 10) {
342  const char* d = GetDigitsLut() + K * 2;
343  *buffer++ = d[0];
344  *buffer++ = d[1];
345  }
346  else
347  *buffer++ = static_cast<char>('0' + static_cast<char>(K));
348 
349  return buffer;
350 }
351 
352 inline char* Prettify(char* buffer, int length, int k) {
353  const int kk = length + k; // 10^(kk-1) <= v < 10^kk
354 
355  if (length <= kk && kk <= 21) {
356  // 1234e7 -> 12340000000
357  for (int i = length; i < kk; i++)
358  buffer[i] = '0';
359  buffer[kk] = '.';
360  buffer[kk + 1] = '0';
361  return &buffer[kk + 2];
362  }
363  else if (0 < kk && kk <= 21) {
364  // 1234e-2 -> 12.34
365  std::memmove(&buffer[kk + 1], &buffer[kk], length - kk);
366  buffer[kk] = '.';
367  return &buffer[length + 1];
368  }
369  else if (-6 < kk && kk <= 0) {
370  // 1234e-6 -> 0.001234
371  const int offset = 2 - kk;
372  std::memmove(&buffer[offset], &buffer[0], length);
373  buffer[0] = '0';
374  buffer[1] = '.';
375  for (int i = 2; i < offset; i++)
376  buffer[i] = '0';
377  return &buffer[length + offset];
378  }
379  else if (length == 1) {
380  // 1e30
381  buffer[1] = 'e';
382  return WriteExponent(kk - 1, &buffer[2]);
383  }
384  else {
385  // 1234e30 -> 1.234e33
386  std::memmove(&buffer[2], &buffer[1], length - 1);
387  buffer[1] = '.';
388  buffer[length + 1] = 'e';
389  return WriteExponent(kk - 1, &buffer[0 + length + 2]);
390  }
391 }
392 
393 inline char* dtoa(double value, char* buffer) {
394  if (value == 0) {
395  buffer[0] = '0';
396  buffer[1] = '.';
397  buffer[2] = '0';
398  return &buffer[3];
399  }
400  else {
401  if (value < 0) {
402  *buffer++ = '-';
403  value = -value;
404  }
405  int length, K;
406  Grisu2(value, buffer, &length, &K);
407  return Prettify(buffer, length, K);
408  }
409 }
410 
411 #ifdef __GNUC__
412 RAPIDJSON_DIAG_POP
413 #endif
414 
415 } // namespace internal
416 } // namespace rapidjson
417 
418 #endif // RAPIDJSON_DTOA_
#define RAPIDJSON_UINT64_C2(high32, low32)
Construct a 64-bit literal by a pair of 32-bit integer.
Definition: rapidjson.h:186
main RapidJSON namespace
Definition: rapidjson.h:241