|
1 | | -// author : xjb |
2 | | -// src : github.com/xjb714/xjb |
3 | | -// date : 2026.2.2 |
4 | | - |
5 | | -// todo : big-endian support, msvc support, optimize for performance, add |
6 | | -// comments, reduce code size, etc. |
| 1 | +// Copyright 2026 xjb714 and contributors |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// |
| 7 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +// |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
7 | 14 |
|
8 | 15 | #include "ftoa.h" |
9 | 16 |
|
@@ -325,19 +332,20 @@ static const struct const_value_float constants_float = { |
325 | 332 | .m32_4 = {0x147b000, -100 + 0x10000, 0xce0, -10 + 0x100}, |
326 | 333 | }; |
327 | 334 |
|
| 335 | +// size: 17728, align: 64 |
328 | 336 | struct double_table_t { |
329 | 337 | static constexpr int e10_DN = -4; |
330 | 338 | static constexpr int e10_UP = 15; |
331 | 339 | static constexpr int max_dec_sig_len = 17; |
332 | 340 | static constexpr int num_pow10 = 323 - (-293) + 1; |
333 | | - uint64_t pow10_double[(323 - (-293) + 1) * 2] = {}; |
334 | | - uint64_t exp_result_double[324 + 308 + 1] = {}; |
335 | | - alignas(64) unsigned char e10_variable_data[e10_UP - e10_DN + 1 + 1][64] = {}; |
| 341 | + uint64_t pow10_double[(323 - (-293) + 1) * 2] = {}; // 1234 * 8 = 9872 bytes |
| 342 | + uint64_t exp_result_double[324 + 308 + 1] = {}; // 633 * 8 = 5064 bytes |
| 343 | + alignas(64) unsigned char e10_variable_data[e10_UP - e10_DN + 1 + 1][XJB_NO_MEMMOVE ? 64 : 32] = {}; |
336 | 344 | unsigned char h7[2048] = {}; |
337 | 345 |
|
338 | | - // uint8_t shuffle_table[17] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, |
339 | | - // 13, 14, 15, 0}; uint8_t shuffle_table_big_endian[17] = {0, 7, 6, 5, 4, 3, |
340 | | - // 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}; |
| 346 | + /* Assert size of per line in e10_variable_data is enough. */ |
| 347 | + static_assert(32 >= max_dec_sig_len + 5, ""); |
| 348 | + |
341 | 349 | constexpr double_table_t() { |
342 | 350 | struct uint192 { |
343 | 351 | uint64_t w0, w1, w2; |
@@ -393,16 +401,18 @@ struct double_table_t { |
393 | 401 | : (dec_sig_len + 1 - (dec_sig_len == 1))); |
394 | 402 | e10_variable_data[tmp_data_ofs][dec_sig_len - 1] = exp_pos; |
395 | 403 | } |
396 | | - uint8_t v = 0xf; |
397 | | - for (uint64_t j = 0; j < 16; ++j) |
398 | | - e10_variable_data[tmp_data_ofs][32 + 16 + j] = v--; |
399 | | - if (move_pos > dot_pos) { |
400 | | - for (uint64_t j = 15; j > dot_pos && j > 0; --j) |
401 | | - e10_variable_data[tmp_data_ofs][j + 32 + 16] = e10_variable_data[tmp_data_ofs][j + 32 + 16 - 1]; |
402 | | - } |
403 | | - for (uint64_t j = 0; j < 16; ++j) { |
404 | | - auto v = e10_variable_data[tmp_data_ofs][j + 32 + 16]; |
405 | | - e10_variable_data[tmp_data_ofs][j + 32] = v ? (v - 1) : 15; |
| 404 | + if (XJB_NO_MEMMOVE) { |
| 405 | + uint8_t v = 0xf; |
| 406 | + for (uint64_t j = 0; j < 16; ++j) |
| 407 | + e10_variable_data[tmp_data_ofs][32 + 16 + j] = v--; |
| 408 | + if (move_pos > dot_pos) { |
| 409 | + for (uint64_t j = 15; j > dot_pos && j > 0; --j) |
| 410 | + e10_variable_data[tmp_data_ofs][j + 32 + 16] = e10_variable_data[tmp_data_ofs][j + 32 + 16 - 1]; |
| 411 | + } |
| 412 | + for (uint64_t j = 0; j < 16; ++j) { |
| 413 | + auto v = e10_variable_data[tmp_data_ofs][j + 32 + 16]; |
| 414 | + e10_variable_data[tmp_data_ofs][j + 32] = v ? (v - 1) : 15; |
| 415 | + } |
406 | 416 | } |
407 | 417 | } |
408 | 418 | for (int exp = 0; exp < 2048; ++exp) { |
@@ -440,22 +450,31 @@ struct const_value_double { |
440 | 450 | int32_t multipliers32[4] = {0x68db8bb, -10000 + 0x10000, 0x147b000, -100 + 0x10000}; // 16 |
441 | 451 | int16_t multipliers16[8] = {0xce0, -10 + 0x100, '0' + '0' * 256}; // 16 |
442 | 452 | #endif |
| 453 | +#if XJB_USE_NEON |
443 | 454 | uint8_t shuffle_table_neon[32] = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, |
444 | 455 | 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7}; |
| 456 | +#endif |
| 457 | +#if XJB_USE_NEON && XJB_NO_MEMMOVE |
445 | 458 | uint8_t reverse_shuffle_table[17] = {0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; |
| 459 | +#endif |
| 460 | +#if (XJB_NOT_REMOVE_FIRST_ZERO && XJB_USE_SSSE3) || XJB_USE_NEON |
446 | 461 | uint8_t shuffle_table[17] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0}; |
| 462 | +#endif |
447 | 463 | }; |
448 | 464 |
|
449 | | - |
450 | 465 | struct float_table_t { |
451 | 466 | static const int e10_DN = -3; |
452 | 467 | static const int e10_UP = 6; |
453 | 468 | static const int max_dec_sig_len = 9; |
454 | 469 | static const int num_pow10 = 44 - (-32) + 1; |
455 | 470 | uint64_t pow10_float_reverse[44 - (-32) + 1] = {}; |
456 | 471 | uint32_t exp_result_float[45 + 38 + 1] = {}; |
457 | | - unsigned char e10_variable_data[e10_UP - (e10_DN) + 1 + 1][1 ? 16 : max_dec_sig_len + 3] = {}; |
| 472 | + unsigned char e10_variable_data[e10_UP - (e10_DN) + 1 + 1][16] = {}; |
458 | 473 | unsigned char h37[256] = {}; |
| 474 | + |
| 475 | + /* Assert size of per line in e10_variable_data is enough. */ |
| 476 | + static_assert(16 >= max_dec_sig_len + 3, ""); |
| 477 | + |
459 | 478 | struct const_value_float constants_float = { |
460 | 479 | #if defined(__aarch64__) |
461 | 480 | .c1 = (((u64)('0' + '0' * 256) << (36)) + (((u64)1 << (36 - 1)) - 7)), |
|
0 commit comments