Skip to content

Commit 24b41c7

Browse files
committed
upload xjb16 code,not optimization version
1 parent 4deec0a commit 24b41c7

8 files changed

Lines changed: 1618 additions & 42 deletions

File tree

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ other_benchmark_project/bench_16digit_to_ascii/verify
2121
bench/xjb/perf/main_g
2222
bench/xjb/perf/main_c
2323
bench/xjb/perf/main_i
24+
bench/xjb/perf/*.a
25+
bench/xjb/perf/*.o
2426

2527
bench/xjb/*.s
2628
bench/xjb/perf/*.s

bench/xjb/float_to_string/ftoa.cpp

Lines changed: 43 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
1-
// author : xjb
2-
// src : github.com/xjb714/xjb
3-
// date : 2026.2.2
4-
5-
// todo : big-endian support, msvc support, optimize for performance, add
6-
// comments, reduce code size, etc.
1+
// Copyright 2026 xjb714 and contributors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
714

815
#include "ftoa.h"
916

@@ -325,19 +332,20 @@ static const struct const_value_float constants_float = {
325332
.m32_4 = {0x147b000, -100 + 0x10000, 0xce0, -10 + 0x100},
326333
};
327334

335+
// size: 17728, align: 64
328336
struct double_table_t {
329337
static constexpr int e10_DN = -4;
330338
static constexpr int e10_UP = 15;
331339
static constexpr int max_dec_sig_len = 17;
332340
static constexpr int num_pow10 = 323 - (-293) + 1;
333-
uint64_t pow10_double[(323 - (-293) + 1) * 2] = {};
334-
uint64_t exp_result_double[324 + 308 + 1] = {};
335-
alignas(64) unsigned char e10_variable_data[e10_UP - e10_DN + 1 + 1][64] = {};
341+
uint64_t pow10_double[(323 - (-293) + 1) * 2] = {}; // 1234 * 8 = 9872 bytes
342+
uint64_t exp_result_double[324 + 308 + 1] = {}; // 633 * 8 = 5064 bytes
343+
alignas(64) unsigned char e10_variable_data[e10_UP - e10_DN + 1 + 1][XJB_NO_MEMMOVE ? 64 : 32] = {};
336344
unsigned char h7[2048] = {};
337345

338-
// uint8_t shuffle_table[17] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
339-
// 13, 14, 15, 0}; uint8_t shuffle_table_big_endian[17] = {0, 7, 6, 5, 4, 3,
340-
// 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8};
346+
/* Assert size of per line in e10_variable_data is enough. */
347+
static_assert(32 >= max_dec_sig_len + 5, "");
348+
341349
constexpr double_table_t() {
342350
struct uint192 {
343351
uint64_t w0, w1, w2;
@@ -393,16 +401,18 @@ struct double_table_t {
393401
: (dec_sig_len + 1 - (dec_sig_len == 1)));
394402
e10_variable_data[tmp_data_ofs][dec_sig_len - 1] = exp_pos;
395403
}
396-
uint8_t v = 0xf;
397-
for (uint64_t j = 0; j < 16; ++j)
398-
e10_variable_data[tmp_data_ofs][32 + 16 + j] = v--;
399-
if (move_pos > dot_pos) {
400-
for (uint64_t j = 15; j > dot_pos && j > 0; --j)
401-
e10_variable_data[tmp_data_ofs][j + 32 + 16] = e10_variable_data[tmp_data_ofs][j + 32 + 16 - 1];
402-
}
403-
for (uint64_t j = 0; j < 16; ++j) {
404-
auto v = e10_variable_data[tmp_data_ofs][j + 32 + 16];
405-
e10_variable_data[tmp_data_ofs][j + 32] = v ? (v - 1) : 15;
404+
if (XJB_NO_MEMMOVE) {
405+
uint8_t v = 0xf;
406+
for (uint64_t j = 0; j < 16; ++j)
407+
e10_variable_data[tmp_data_ofs][32 + 16 + j] = v--;
408+
if (move_pos > dot_pos) {
409+
for (uint64_t j = 15; j > dot_pos && j > 0; --j)
410+
e10_variable_data[tmp_data_ofs][j + 32 + 16] = e10_variable_data[tmp_data_ofs][j + 32 + 16 - 1];
411+
}
412+
for (uint64_t j = 0; j < 16; ++j) {
413+
auto v = e10_variable_data[tmp_data_ofs][j + 32 + 16];
414+
e10_variable_data[tmp_data_ofs][j + 32] = v ? (v - 1) : 15;
415+
}
406416
}
407417
}
408418
for (int exp = 0; exp < 2048; ++exp) {
@@ -440,22 +450,31 @@ struct const_value_double {
440450
int32_t multipliers32[4] = {0x68db8bb, -10000 + 0x10000, 0x147b000, -100 + 0x10000}; // 16
441451
int16_t multipliers16[8] = {0xce0, -10 + 0x100, '0' + '0' * 256}; // 16
442452
#endif
453+
#if XJB_USE_NEON
443454
uint8_t shuffle_table_neon[32] = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8,
444455
6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7};
456+
#endif
457+
#if XJB_USE_NEON && XJB_NO_MEMMOVE
445458
uint8_t reverse_shuffle_table[17] = {0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
459+
#endif
460+
#if (XJB_NOT_REMOVE_FIRST_ZERO && XJB_USE_SSSE3) || XJB_USE_NEON
446461
uint8_t shuffle_table[17] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0};
462+
#endif
447463
};
448464

449-
450465
struct float_table_t {
451466
static const int e10_DN = -3;
452467
static const int e10_UP = 6;
453468
static const int max_dec_sig_len = 9;
454469
static const int num_pow10 = 44 - (-32) + 1;
455470
uint64_t pow10_float_reverse[44 - (-32) + 1] = {};
456471
uint32_t exp_result_float[45 + 38 + 1] = {};
457-
unsigned char e10_variable_data[e10_UP - (e10_DN) + 1 + 1][1 ? 16 : max_dec_sig_len + 3] = {};
472+
unsigned char e10_variable_data[e10_UP - (e10_DN) + 1 + 1][16] = {};
458473
unsigned char h37[256] = {};
474+
475+
/* Assert size of per line in e10_variable_data is enough. */
476+
static_assert(16 >= max_dec_sig_len + 3, "");
477+
459478
struct const_value_float constants_float = {
460479
#if defined(__aarch64__)
461480
.c1 = (((u64)('0' + '0' * 256) << (36)) + (((u64)1 << (36 - 1)) - 7)),

bench/xjb/perf/makefile

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,47 @@
1+
GCC = g++
2+
ICPX = icpx
3+
CLANG = clang++
4+
15

26
# with no-PIC
37
# /////////////////////////////
48
io:
5-
icpx -O3 -march=native -c -fno-PIC ../float_to_string/ftoa.cpp -o ftoa.o
9+
$(ICPX) -O3 -march=native -c -fno-PIC ../float_to_string/ftoa.cpp -o ftoa.o
610
ar rcs libftoa.a ftoa.o
7-
icpx -O3 -march=native main.cpp -o main_i -static -L. -lftoa -fno-PIC
11+
$(ICPX) -O3 -march=native main.cpp -o main_i -static -L. -lftoa -fno-PIC
812
sudo perf stat -d ./main_i
913

1014
go:
11-
g++ -O3 -march=native -c -fno-PIC ../float_to_string/ftoa.cpp -o ftoa.o
15+
$(GCC) -O3 -march=native -c -fno-PIC ../float_to_string/ftoa.cpp -o ftoa.o
1216
ar rcs libftoa.a ftoa.o
13-
g++ -O3 -march=native main.cpp -o main_g -static -L. -lftoa -fno-PIC
17+
$(GCC) -O3 -march=native main.cpp -o main_g -static -L. -lftoa -fno-PIC
1418
sudo perf stat -d ./main_g
1519

1620
co:
17-
clang++ -O3 -march=native -c -fno-PIC ../float_to_string/ftoa.cpp -o ftoa.o
21+
$(CLANG) -O3 -march=native -c -fno-PIC ../float_to_string/ftoa.cpp -o ftoa.o
1822
ar rcs libftoa.a ftoa.o
19-
clang++ -O3 -march=native main.cpp -o main_c -static -L. -lftoa -fno-PIC
23+
$(CLANG) -O3 -march=native main.cpp -o main_c -static -L. -lftoa -fno-PIC
2024
sudo perf stat -d ./main_c
2125
# /////////////////////////////
2226

2327
# with PIC
2428
# /////////////////////////////
2529
ip:
26-
icpx -O3 -march=native -c -fPIC ../float_to_string/ftoa.cpp -o ftoa.o
30+
$(ICPX) -O3 -march=native -c -fPIC ../float_to_string/ftoa.cpp -o ftoa.o
2731
ar rcs libftoa.a ftoa.o
28-
icpx -O3 -march=native main.cpp -o main_i -static -L. -lftoa
32+
$(ICPX) -O3 -march=native main.cpp -o main_i -static -L. -lftoa
2933
sudo perf stat -d ./main_i
3034

3135
gp:
32-
g++ -O3 -march=native -c -fPIC ../float_to_string/ftoa.cpp -o ftoa.o
36+
$(GCC) -O3 -march=native -c -fPIC ../float_to_string/ftoa.cpp -o ftoa.o
3337
ar rcs libftoa.a ftoa.o
34-
g++ -O3 -march=native main.cpp -o main_g -static -L. -lftoa
38+
$(GCC) -O3 -march=native main.cpp -o main_g -static -L. -lftoa
3539
sudo perf stat -d ./main_g
3640

3741
cp:
38-
clang++ -O3 -march=native -c -fPIC ../float_to_string/ftoa.cpp -o ftoa.o
42+
$(CLANG) -O3 -march=native -c -fPIC ../float_to_string/ftoa.cpp -o ftoa.o
3943
ar rcs libftoa.a ftoa.o
40-
clang++ -O3 -march=native main.cpp -o main_c -static -L. -lftoa
44+
$(CLANG) -O3 -march=native main.cpp -o main_c -static -L. -lftoa
4145
sudo perf stat -d ./main_c
4246
# /////////////////////////////
4347

@@ -48,14 +52,14 @@ cp:
4852
# generate assembly code
4953
# /////////////////////////////
5054
s:
51-
icpx -O3 -std=c++20 -fno-PIC -S ../float_to_string/ftoa.cpp -o ftoa_icpx_no_PIC.s -march=native
52-
clang++ -O3 -std=c++20 -fno-PIC -S ../float_to_string/ftoa.cpp -o ftoa_clang_no_PIC.s -march=native
53-
g++ -O3 -std=c++20 -fno-PIC -S ../float_to_string/ftoa.cpp -o ftoa_gcc_no_PIC.s -march=native
55+
$(ICPX) -O3 -std=c++20 -fno-PIC -S ../float_to_string/ftoa.cpp -o ftoa_icpx_no_PIC.s -march=native
56+
$(CLANG) -O3 -std=c++20 -fno-PIC -S ../float_to_string/ftoa.cpp -o ftoa_clang_no_PIC.s -march=native
57+
$(GCC) -O3 -std=c++20 -fno-PIC -S ../float_to_string/ftoa.cpp -o ftoa_gcc_no_PIC.s -march=native
5458

5559
spic:
56-
icpx -O3 -std=c++20 -fPIC -S ../float_to_string/ftoa.cpp -o ftoa_icpx_PIC.s -march=native
57-
clang++ -O3 -std=c++20 -fPIC -S ../float_to_string/ftoa.cpp -o ftoa_clang_PIC.s -march=native
58-
g++ -O3 -std=c++20 -fPIC -S ../float_to_string/ftoa.cpp -o ftoa_gcc_PIC.s -march=native
60+
$(ICPX) -O3 -std=c++20 -fPIC -S ../float_to_string/ftoa.cpp -o ftoa_icpx_PIC.s -march=native
61+
$(CLANG) -O3 -std=c++20 -fPIC -S ../float_to_string/ftoa.cpp -o ftoa_clang_PIC.s -march=native
62+
$(GCC) -O3 -std=c++20 -fPIC -S ../float_to_string/ftoa.cpp -o ftoa_gcc_PIC.s -march=native
5963
# /////////////////////////////
6064

6165

bench/xjb/test/f16_to_decimal.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,10 +203,14 @@ int main() {
203203

204204
out << "# bits(hex) d k\n";
205205

206+
uint32_t d_min = 99999999;
207+
uint32_t d_max = 0;
206208
// 遍历所有正 FP16 数值 (排除 0x0000, 0x7C00..0x7FFF)
207209
for (uint32_t bits = 0x0001; bits <= 0x7BFF; ++bits) {
208210
try {
209211
auto [d, k] = f16_to_decimal(static_cast<uint16_t>(bits));
212+
d_min = d > d_min ? d_min : d;
213+
d_max = d > d_max ? d : d_max;
210214
out << "0x" << std::hex << std::uppercase << bits << std::dec
211215
<< " " << (int64_t)d << " " << k << "\n";
212216
} catch (const std::invalid_argument&) {
@@ -220,5 +224,6 @@ int main() {
220224

221225
out.close();
222226
std::cout << "Results written to f16_decimal_results.txt" << std::endl;
227+
printf("d_min = %u, d_max = %u\n",d_min,d_max);
223228
return 0;
224229
}

0 commit comments

Comments
 (0)