Skip to content

Commit 3dd9bb8

Browse files
committed
update ftoa.cpp remove outdated file
1 parent b41adb5 commit 3dd9bb8

9 files changed

Lines changed: 958 additions & 5208 deletions

File tree

bench/xjb/float_to_string/ftoa.cpp

Lines changed: 159 additions & 154 deletions
Large diffs are not rendered by default.

bench/xjb/float_to_string/ftoa_nommove.cpp

Lines changed: 0 additions & 1710 deletions
This file was deleted.

bench/xjb/float_to_string/ftoa_spilt.cpp

Lines changed: 0 additions & 1610 deletions
This file was deleted.

bench/xjb/float_to_string/ftoa_v2.cpp

Lines changed: 0 additions & 1639 deletions
This file was deleted.

bench/xjb/test/bench_f16.cpp

Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
#include <algorithm>
2+
#include <chrono>
3+
#include <cstdint>
4+
#include <cstring>
5+
#include <fstream>
6+
#include <iomanip>
7+
#include <iostream>
8+
#include <random>
9+
#include <sstream>
10+
#include <string>
11+
12+
// ==================== 128位分数类(同前) ====================
13+
struct Fraction {
14+
__int128 num, den;
15+
16+
Fraction(__int128 n = 0, __int128 d = 1) : num(n), den(d) {
17+
if (den < 0) {
18+
num = -num;
19+
den = -den;
20+
}
21+
reduce();
22+
}
23+
24+
void reduce() {
25+
if (den == 0)
26+
throw std::runtime_error("Denominator zero");
27+
__int128 g = gcd(num < 0 ? -num : num, den);
28+
num /= g;
29+
den /= g;
30+
}
31+
32+
static __int128 gcd(__int128 a, __int128 b) {
33+
while (b) {
34+
__int128 t = b;
35+
b = a % b;
36+
a = t;
37+
}
38+
return a < 0 ? -a : a;
39+
}
40+
41+
bool operator<(const Fraction& rhs) const { return num * rhs.den < rhs.num * den; }
42+
bool operator<=(const Fraction& rhs) const { return num * rhs.den <= rhs.num * den; }
43+
bool operator==(const Fraction& rhs) const { return num == rhs.num && den == rhs.den; }
44+
bool operator!=(const Fraction& rhs) const { return !(*this == rhs); }
45+
bool operator>(const Fraction& rhs) const { return rhs < *this; }
46+
bool operator>=(const Fraction& rhs) const { return rhs <= *this; }
47+
48+
Fraction operator+(const Fraction& rhs) const { return Fraction(num * rhs.den + rhs.num * den, den * rhs.den); }
49+
Fraction operator-(const Fraction& rhs) const { return Fraction(num * rhs.den - rhs.num * den, den * rhs.den); }
50+
Fraction operator*(const Fraction& rhs) const { return Fraction(num * rhs.num, den * rhs.den); }
51+
Fraction operator/(const Fraction& rhs) const { return Fraction(num * rhs.den, den * rhs.num); }
52+
53+
__int128 floor() const {
54+
if (num >= 0)
55+
return num / den;
56+
else
57+
return (num - den + 1) / den;
58+
}
59+
60+
Fraction frac() const { return *this - Fraction(floor(), 1); }
61+
};
62+
63+
Fraction pow(int base, int exp) {
64+
if (exp == 0)
65+
return Fraction(1);
66+
if (exp < 0)
67+
return Fraction(1) / pow(base, -exp);
68+
__int128 result = 1;
69+
for (int i = 0; i < exp; ++i)
70+
result *= base;
71+
return Fraction(result);
72+
}
73+
74+
// ==================== FP16 解析(同前) ====================
75+
struct FP16Components {
76+
uint16_t bits;
77+
__int128 c;
78+
int q;
79+
bool is_regular;
80+
bool is_irregular;
81+
};
82+
83+
FP16Components f16_to_components(uint16_t bits) {
84+
int exp = (bits >> 10) & 0x1F;
85+
int frac = bits & 0x3FF;
86+
87+
FP16Components comp;
88+
comp.bits = bits;
89+
if (exp == 0) {
90+
comp.c = frac;
91+
comp.q = -24;
92+
comp.is_regular = true;
93+
comp.is_irregular = false;
94+
} else {
95+
comp.c = 1024 + frac;
96+
comp.q = exp - 25;
97+
comp.is_regular = (frac != 0);
98+
comp.is_irregular = (frac == 0);
99+
}
100+
return comp;
101+
}
102+
103+
int compute_k(int q, bool is_regular) {
104+
int k;
105+
if (is_regular) {
106+
k = (q * 1233) >> 12;
107+
} else {
108+
k = ((q * 1233) - 512) >> 12;
109+
}
110+
return k;
111+
}
112+
113+
std::pair<uint32_t, int> f16_to_decimal(uint16_t bits) {
114+
auto comp = f16_to_components(bits);
115+
__int128 c = comp.c;
116+
int q = comp.q;
117+
bool is_regular = comp.is_regular;
118+
bool is_irregular = comp.is_irregular;
119+
120+
int k = compute_k(q, is_regular);
121+
Fraction v = Fraction(c) * pow(2, q);
122+
Fraction R = v * pow(10, -k - 1);
123+
__int128 m = R.floor();
124+
Fraction n = R.frac();
125+
126+
__int128 ten = 10 * m;
127+
Fraction tenR = v * pow(10, -k);
128+
Fraction ten_n = tenR - Fraction(ten);
129+
__int128 floor_ten_n = ten_n.floor();
130+
Fraction delta = ten_n.frac();
131+
132+
__int128 one;
133+
if (delta == Fraction(1, 2)) {
134+
if (floor_ten_n % 2 == 0)
135+
one = floor_ten_n;
136+
else
137+
one = floor_ten_n + 1;
138+
} else if (delta < Fraction(1, 2)) {
139+
one = floor_ten_n;
140+
} else {
141+
one = floor_ten_n + 1;
142+
}
143+
Fraction A = pow(2, q - 1) * pow(10, -k - 1);
144+
if (is_irregular) {
145+
Fraction cond1 = pow(2, q - 2) * pow(10, -k);
146+
Fraction cond2 = pow(2, q - 2) * pow(10, -k - 1);
147+
if (delta > cond1)
148+
one = floor_ten_n + 1;
149+
if (cond2 >= n)
150+
one = 0;
151+
} else {
152+
if (A > n || (A == n && c % 2 == 0))
153+
one = 0;
154+
}
155+
if (A > (Fraction(1) - n) || (A == (Fraction(1) - n) && c % 2 == 0))
156+
one = 10;
157+
__int128 d = ten + one;
158+
return {static_cast<uint32_t>(d), k};
159+
}
160+
161+
// ==================== BCD 辅助 ====================
162+
static inline uint32_t to_bcd4(uint32_t d) {
163+
// convert binary to little_endian bcd;
164+
// require d range [0,9999];
165+
uint64_t abcd = d;
166+
uint64_t ab_cd = (abcd << 16) + (1 - (100 << 16)) * (((abcd * 0x147b) >> 19));
167+
uint64_t a_b_c_d = (ab_cd << 8) + (1 - (10 << 8)) * (((ab_cd * 0x67) >> 10) & 0x000f000f);
168+
return static_cast<uint32_t>(a_b_c_d);
169+
}
170+
171+
// ==================== xjb16 核心转换函数 ====================
172+
char* xjb16(uint16_t bits, char* buf) {
173+
// same result as str(numpy.float16(v)) in python.
174+
// v has 16bits binary representation = bits
175+
176+
// 16 = 1 + 5 + 10; sign + exp + sig
177+
*buf = '-';
178+
buf += bits >> (10 + 5);
179+
uint32_t exp = (bits >> 10) & ((1 << 5) - 1);
180+
uint32_t sig = bits & ((1 << 10) - 1);
181+
uint32_t sig_bin = sig | (1 << 10);
182+
int32_t exp_bin = exp - ((1 << 4) - 1) - 10;
183+
if (exp == 0) [[unlikely]] {
184+
if (sig == 0)
185+
return (char*)memcpy(buf, "0.0", 4) + 3;
186+
exp_bin = 1 - ((1 << 4) - 1) - 10;
187+
sig_bin = sig;
188+
}
189+
if (exp == 31) [[unlikely]]
190+
return (char*)memcpy(buf, sig ? "nan" : "inf", 4) + 3;
191+
192+
uint16_t bits_abs = bits & ((1 << 15) - 1);
193+
auto [d, k] = f16_to_decimal(bits_abs);
194+
int tz = -1;
195+
int mul = 1;
196+
int d_div = d;
197+
while (d_div * mul == d) {
198+
d_div /= 10;
199+
mul *= 10;
200+
tz++;
201+
}
202+
203+
// len range = [1, 5];
204+
// d range = [6, 19990];
205+
// f16 min = 6e-8; max is 65500
206+
uint32_t len = 1 + (d >= 10) + (d >= 100) + (d >= 1000) + (d >= 10000);
207+
memcpy(buf, "00000000", 8);
208+
uint32_t d_rem = (d >= 10000) ? (d - 10000) : d;
209+
210+
int len_2 = len;
211+
while (len_2 < 4) {
212+
len_2++;
213+
d_rem *= 10;
214+
}
215+
216+
uint32_t bcd = to_bcd4(d_rem);
217+
uint32_t ascii = bcd | 0x30303030;
218+
uint32_t dec_sig_len = len - tz; // [1, 5];
219+
220+
const int FIXED_MIN = -4;
221+
const int FIXED_MAX = 2;
222+
223+
int e10 = k + len - 1;
224+
uint32_t first_sig_pos = (FIXED_MIN <= e10 && e10 <= -1) ? 1 - e10 : 0;
225+
uint32_t dot_pos = (0 <= e10 && e10 <= FIXED_MAX) ? 1 + e10 : 1;
226+
uint32_t move_pos = dot_pos + ((0 <= e10 || e10 < FIXED_MIN));
227+
228+
uint32_t exp_pos = (FIXED_MIN <= e10 && e10 <= -1)
229+
? dec_sig_len
230+
: (0 <= e10 && e10 <= FIXED_MAX ? (e10 + 3 > dec_sig_len + 1 ? e10 + 3 : dec_sig_len + 1)
231+
: (dec_sig_len + 1 - (dec_sig_len == 1)));
232+
char* buf_origin = buf;
233+
234+
buf += first_sig_pos;
235+
*buf = '1';
236+
memcpy(buf + (d >= 10000), &ascii, 4);
237+
238+
memmove(&buf[move_pos], &buf[dot_pos], 8); // the index (first_sig_pos + dot_pos + sign) max = 7+1=8,
239+
buf_origin[dot_pos] = '.';
240+
buf += exp_pos;
241+
242+
// comput exp ascii;
243+
// only negative value print exp_result;
244+
int e10_neg = e10 < 0;
245+
uint32_t e10_abs = e10_neg ? -e10 : e10;
246+
uint32_t exp_result =
247+
'e' + ((e10_neg ? (uint32_t)'-' : (uint32_t)'+') << 8) + ((uint32_t)'0' << 16) + ((e10_abs + '0') << 24);
248+
if (FIXED_MIN <= e10 && e10 <= FIXED_MAX)
249+
exp_result = 0;
250+
uint32_t exp_len = (FIXED_MIN <= e10 && e10 <= FIXED_MAX) ? 0 : 4;
251+
memcpy(buf, &exp_result, 4);
252+
return buf + exp_len;
253+
}
254+
255+
// ==================== 性能测试主程序 ====================
256+
int main() {
257+
// 生成 N = 2^20 个随机 FP16 值,排除 inf 和 nan
258+
constexpr size_t N = 1 << 20;
259+
std::vector<uint16_t> values;
260+
values.reserve(N);
261+
262+
std::random_device rd;
263+
std::mt19937 gen(rd());
264+
std::uniform_int_distribution<uint16_t> dist(0, 65535);
265+
266+
while (values.size() < N) {
267+
uint16_t bits = dist(gen);
268+
uint16_t exp = (bits >> 10) & 0x1F;
269+
if (exp != 31) { // 排除 exp == 31 (inf/nan)
270+
values.push_back(bits);
271+
}
272+
}
273+
274+
// 缓冲区复用,避免多次分配栈内存影响测量(测量的是转换逻辑本身)
275+
char buffer[32]; // 足够大,xjb16 最多写十几个字符
276+
277+
// 预热:调用一次确保缓存和分支预测稳定
278+
(void)xjb16(values[0], buffer);
279+
280+
// 开始计时
281+
auto start = std::chrono::high_resolution_clock::now();
282+
283+
// 执行 N 次转换,并将结果通过 volatile 防止优化
284+
volatile char sink = 0;
285+
for (uint16_t bits : values) {
286+
char* end = xjb16(bits, buffer);
287+
// 强制编译器认为结果被使用(防止循环被完全优化掉)
288+
sink += buffer[0] + (end - buffer);
289+
}
290+
291+
auto end = std::chrono::high_resolution_clock::now();
292+
auto duration_us = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
293+
double avg_ns = (duration_us * 1000.0) / N;
294+
295+
// 输出结果
296+
std::cout << "Total operations: " << N << "\n";
297+
std::cout << "Total time: " << duration_us << " us\n";
298+
std::cout << "Average time per call: " << avg_ns << " ns\n";
299+
300+
return 0;
301+
}

bench/xjb/test/f16_to_string.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,10 @@ struct FP16Components {
8282
FP16Components f16_to_components(uint16_t bits) {
8383
int exp = (bits >> 10) & 0x1F;
8484
int frac = bits & 0x3FF;
85-
if (exp == 0 && frac == 0)
86-
throw std::invalid_argument("+0 excluded");
87-
if (exp == 31)
88-
throw std::invalid_argument("inf/NaN excluded");
85+
// if (exp == 0 && frac == 0)
86+
// throw std::invalid_argument("+0 excluded");
87+
// if (exp == 31)
88+
// throw std::invalid_argument("inf/NaN excluded");
8989

9090
FP16Components comp;
9191
comp.bits = bits;
@@ -265,6 +265,9 @@ static inline uint32_t to_bcd4(uint32_t d) {
265265
}
266266

267267
char* xjb16(uint16_t bits, char* buf) {
268+
// same result as str(numpy.float16(v)) in python.
269+
// v has 16bits binary representation = bits
270+
268271
// 16 = 1 + 5 + 10; sign + exp + sig
269272
*buf = '-';
270273
buf += bits >> (10 + 5);

0 commit comments

Comments
 (0)