not use no_memmove for arm64

xjb714 · xjb714 · commit 2fe5644bbb95 · 2026-04-28T14:00:58.000+08:00
diff --git a/bench/xjb/float_to_string/ftoa.cpp b/bench/xjb/float_to_string/ftoa.cpp
@@ -104,7 +104,7 @@
 #        define XJB_NO_MEMMOVE 0
 #    elif XJB_USE_NEON
 // On other aarch64 implementations the performance characteristics of memmove may differ.
-#        define XJB_NO_MEMMOVE 1
+#        define XJB_NO_MEMMOVE 0
 #    else
 #        define XJB_NO_MEMMOVE 0
 #    endif
diff --git a/bench/xjb/perf/main.cpp b/bench/xjb/perf/main.cpp
@@ -24,7 +24,7 @@ const u64 N = (1ull << 25); // data size
 #else
     double *data;
 #endif
-u64 get_cycle()
+static u64 get_cycle()
 {
 #ifdef __amd64__
     uint64_t low, high;
@@ -34,13 +34,13 @@ u64 get_cycle()
     return 0;
 #endif
 }
-auto getns()
+static auto getns()
 {
     auto now = std::chrono::high_resolution_clock::now();
     auto nanos = std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count();
     return nanos;
 }
-double gen_double_filter_NaN_Inf()
+static double gen_double_filter_NaN_Inf()
 {
     unsigned long long rnd,rnd_abs;
     do{
@@ -50,7 +50,7 @@ double gen_double_filter_NaN_Inf()
     while (rnd_abs >= (0x7ffull << 52)); // nan or inf
     return *(double *)&rnd;
 }
-double gen_double_filter_NaN_Inf_subnormal()
+static double gen_double_filter_NaN_Inf_subnormal()
 {
     unsigned long long rnd,rnd_abs;
     do{
@@ -60,7 +60,7 @@ double gen_double_filter_NaN_Inf_subnormal()
     while (rnd_abs >= (0x7ffull << 52) && rnd_abs < (1ull << 52) ); // nan or inf or subnormal
     return *(double *)&rnd;
 }
-float gen_float_filter_NaN_Inf()
+static float gen_float_filter_NaN_Inf()
 {
     unsigned int rnd,rnd_abs;
     do{
@@ -70,7 +70,7 @@ float gen_float_filter_NaN_Inf()
     while (rnd_abs >= (0xffu << 23)); // nan or inf
     return *(float *)&rnd;
 }
-float gen_float_filter_NaN_Inf_subnormal()
+static float gen_float_filter_NaN_Inf_subnormal()
 {
     unsigned int rnd,rnd_abs;
     do{
@@ -80,7 +80,7 @@ float gen_float_filter_NaN_Inf_subnormal()
     while (rnd_abs >= (0xffu << 23) && rnd_abs < (1u << 23) ); // nan or inf or subnormal
     return *(float *)&rnd;
 }
-void init_data()
+static void init_data()
 {
 #if PERF_DOUBLE_OR_FLOAT == FLOAT
     data = new float[N];
diff --git a/bench/xjb/perf/makefile b/bench/xjb/perf/makefile
@@ -45,9 +45,9 @@ cp:
 	sudo perf stat -d ./main_c
 # /////////////////////////////
 
-
-
-
+mca: # machine code analysis
+	clang++ -O3 -std=c++20 ../float_to_string/ftoa.cpp -S -o ftoa.s -march=native
+	llvm-mca -march=aarch64 --all-stats ./ftoa.s > mca.txt --iterations=1000 --timeline --bottleneck-analysis
 
 # generate assembly code
 # /////////////////////////////
diff --git a/src/ftoa.cpp b/src/ftoa.cpp
@@ -104,7 +104,7 @@
 #        define XJB_NO_MEMMOVE 0
 #    elif XJB_USE_NEON
 // On other aarch64 implementations the performance characteristics of memmove may differ.
-#        define XJB_NO_MEMMOVE 1
+#        define XJB_NO_MEMMOVE 0
 #    else
 #        define XJB_NO_MEMMOVE 0
 #    endif

Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@ const u64 N = (1ull << 25); // data size`
`24`	`24`	`#else`
`25`	`25`	`double *data;`
`26`	`26`	`#endif`
`27`		`-u64 get_cycle()`
	`27`	`+static u64 get_cycle()`
`28`	`28`	`{`
`29`	`29`	`#ifdef __amd64__`
`30`	`30`	`uint64_t low, high;`
`@@ -34,13 +34,13 @@ u64 get_cycle()`
`34`	`34`	`return 0;`
`35`	`35`	`#endif`
`36`	`36`	`}`
`37`		`-auto getns()`
	`37`	`+static auto getns()`
`38`	`38`	`{`
`39`	`39`	`auto now = std::chrono::high_resolution_clock::now();`
`40`	`40`	`auto nanos = std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count();`
`41`	`41`	`return nanos;`
`42`	`42`	`}`
`43`		`-double gen_double_filter_NaN_Inf()`
	`43`	`+static double gen_double_filter_NaN_Inf()`
`44`	`44`	`{`
`45`	`45`	`unsigned long long rnd,rnd_abs;`
`46`	`46`	`do{`
`@@ -50,7 +50,7 @@ double gen_double_filter_NaN_Inf()`
`50`	`50`	`while (rnd_abs >= (0x7ffull << 52)); // nan or inf`
`51`	`51`	`return (double )&rnd;`
`52`	`52`	`}`
`53`		`-double gen_double_filter_NaN_Inf_subnormal()`
	`53`	`+static double gen_double_filter_NaN_Inf_subnormal()`
`54`	`54`	`{`
`55`	`55`	`unsigned long long rnd,rnd_abs;`
`56`	`56`	`do{`
`@@ -60,7 +60,7 @@ double gen_double_filter_NaN_Inf_subnormal()`
`60`	`60`	`while (rnd_abs >= (0x7ffull << 52) && rnd_abs < (1ull << 52) ); // nan or inf or subnormal`
`61`	`61`	`return (double )&rnd;`
`62`	`62`	`}`
`63`		`-float gen_float_filter_NaN_Inf()`
	`63`	`+static float gen_float_filter_NaN_Inf()`
`64`	`64`	`{`
`65`	`65`	`unsigned int rnd,rnd_abs;`
`66`	`66`	`do{`
`@@ -70,7 +70,7 @@ float gen_float_filter_NaN_Inf()`
`70`	`70`	`while (rnd_abs >= (0xffu << 23)); // nan or inf`
`71`	`71`	`return (float )&rnd;`
`72`	`72`	`}`
`73`		`-float gen_float_filter_NaN_Inf_subnormal()`
	`73`	`+static float gen_float_filter_NaN_Inf_subnormal()`
`74`	`74`	`{`
`75`	`75`	`unsigned int rnd,rnd_abs;`
`76`	`76`	`do{`
`@@ -80,7 +80,7 @@ float gen_float_filter_NaN_Inf_subnormal()`
`80`	`80`	`while (rnd_abs >= (0xffu << 23) && rnd_abs < (1u << 23) ); // nan or inf or subnormal`
`81`	`81`	`return (float )&rnd;`
`82`	`82`	`}`
`83`		`-void init_data()`
	`83`	`+static void init_data()`
`84`	`84`	`{`
`85`	`85`	`#if PERF_DOUBLE_OR_FLOAT == FLOAT`
`86`	`86`	`data = new float[N];`