Merge branch 'bugfix/dl_fft' into 'master'

BlueSkyB · BlueSkyB · commit bda141617958 · 2025-06-10T19:46:21.000+08:00
Bugfix/dl fft

See merge request ai/esp-dl!183
diff --git a/test_apps/dl_fft/main/test_dl_fft.cpp b/test_apps/dl_fft/main/test_dl_fft.cpp
@@ -244,7 +244,7 @@ TEST_CASE("7. test dl fft hp s16", "[dl_fft]")
         fft_input_s16_128, fft_input_s16_256, fft_input_s16_512, fft_input_s16_1024, fft_input_s16_2048};
     const float *output[5] = {fft_output_128, fft_output_256, fft_output_512, fft_output_1024, fft_output_2048};
     int test_nfft[5] = {128, 256, 512, 1024, 2048};
-    float target_db = 65; // high precision int16 fft
+    float target_db = 60; // high precision int16 fft
     int ram_size_before = heap_caps_get_free_size(MALLOC_CAP_8BIT);
     uint32_t start = 0, end = 0;
     int out_exponent;
@@ -261,7 +261,7 @@ TEST_CASE("7. test dl fft hp s16", "[dl_fft]")
 
         // check snr
         dl_short_to_float(x, nfft * 2, out_exponent, y);
-        TEST_ASSERT_EQUAL(true, check_fft_results(y, output[i], nfft, target_db, 5e-3));
+        TEST_ASSERT_EQUAL(true, check_fft_results(y, output[i], nfft, target_db, 2e-2));
 
         start = esp_timer_get_time();
         for (int k = 0; k < LOOP; k++) {
@@ -285,7 +285,7 @@ TEST_CASE("8. test dl ifft hp s16", "[dl_fft]")
         fft_input_s16_128, fft_input_s16_256, fft_input_s16_512, fft_input_s16_1024, fft_input_s16_2048};
     // const float *output[5] = {fft_input_128, fft_input_256, fft_input_512, fft_input_1024, fft_input_2048};
     int test_nfft[5] = {128, 256, 512, 1024, 2048};
-    float target_db = 65; // high precision int16 fft
+    float target_db = 60; // high precision int16 fft
     int ram_size_before = heap_caps_get_free_size(MALLOC_CAP_8BIT);
     uint32_t start = 0, end = 0;
     int exponent;
@@ -306,7 +306,7 @@ TEST_CASE("8. test dl ifft hp s16", "[dl_fft]")
         // check snr
         dl_short_to_float(x, nfft * 2, out_exponent, y);
         dl_short_to_float(input[i], nfft * 2, -15, z);
-        TEST_ASSERT_EQUAL(true, check_fft_results(y, z, nfft, target_db, 5e-3));
+        TEST_ASSERT_EQUAL(true, check_fft_results(y, z, nfft, target_db, 1e-2));
 
         start = esp_timer_get_time();
         for (int k = 0; k < LOOP; k++) {
diff --git a/tools/dl_fft/base/dl_fft2r_sc16_ansi.c b/tools/dl_fft/base/dl_fft2r_sc16_ansi.c
@@ -8,6 +8,7 @@ static inline int16_t dl_xtfixed_bf_1(
     result -= (int32_t)a1 * (int32_t)a2 + (int32_t)a3 * (int32_t)a4;
     result += add_rount_mult;
     result = result >> result_shift;
+
     return (int16_t)result;
 }
 
@@ -19,6 +20,7 @@ static inline int16_t dl_xtfixed_bf_2(
     result -= ((int32_t)a1 * (int32_t)a2 - (int32_t)a3 * (int32_t)a4);
     result += add_rount_mult;
     result = result >> result_shift;
+
     return (int16_t)result;
 }
 
@@ -30,6 +32,7 @@ static inline int16_t dl_xtfixed_bf_3(
     result += (int32_t)a1 * (int32_t)a2 + (int32_t)a3 * (int32_t)a4;
     result += add_rount_mult;
     result = result >> result_shift;
+
     return (int16_t)result;
 }
 
@@ -41,6 +44,7 @@ static inline int16_t dl_xtfixed_bf_4(
     result += (int32_t)a1 * (int32_t)a2 - (int32_t)a3 * (int32_t)a4;
     result += add_rount_mult;
     result = result >> result_shift;
+
     return (int16_t)result;
 }
 
@@ -192,23 +196,25 @@ esp_err_t dl_fft2r_sc16_hp_ansi(int16_t *data, int N, int16_t *table, int *shift
     uint32_t *w = (uint32_t *)table;
     uint32_t *in_data = (uint32_t *)data;
 
-    int ie, ia, m;
+    int ie, ia, m, loop_num = 2;
     dl_sc16_t cs; // c - re, s - im
     dl_sc16_t m_data;
     dl_sc16_t a_data;
     int add_rount_mult = 1 << 15;
-    bool flag = true;
 
     ie = 1;
     shift[0] = 0;
     for (int N2 = N / 2; N2 > 0; N2 >>= 1) {
         ia = 0;
         int loop_shift = 16;
-        if (flag || N2 == 1) {
-            loop_shift = dl_array_max_q_s16(data, N);
-            flag = false;
+        if (loop_num == 2) {
+            loop_shift = dl_array_max_q_s16(data, N * 2);
+            if (loop_shift < 16) {
+                loop_shift += 1;
+            }
+            loop_num = 0;
         } else {
-            flag = true;
+            loop_num += 1;
         }
         shift[0] += loop_shift - 15;
         add_rount_mult = 1 << (loop_shift - 1);
@@ -273,23 +279,25 @@ esp_err_t dl_ifft2r_sc16_hp_ansi(int16_t *data, int N, int16_t *table, int *shif
     uint32_t *w = (uint32_t *)table;
     uint32_t *in_data = (uint32_t *)data;
 
-    int ie, ia, m;
+    int ie, ia, m, loop_num = 2;
     dl_sc16_t cs; // c - re, s - im
     dl_sc16_t m_data;
     dl_sc16_t a_data;
     int add_rount_mult = 1 << 15;
-    bool flag = true;
 
     ie = 1;
     shift[0] = 0;
     for (int N2 = N / 2; N2 > 0; N2 >>= 1) {
         ia = 0;
         int loop_shift = 16;
-        if (flag || N2 == 1) {
-            loop_shift = dl_array_max_q_s16(data, N);
-            flag = false;
+        if (loop_num == 2) {
+            loop_shift = dl_array_max_q_s16(data, N * 2);
+            if (loop_shift < 16) {
+                loop_shift += 1;
+            }
+            loop_num = 0;
         } else {
-            flag = true;
+            loop_num += 1;
         }
         shift[0] += loop_shift - 15;
         add_rount_mult = 1 << (loop_shift - 1);
diff --git a/tools/dl_fft/base/dl_fft_base.c b/tools/dl_fft/base/dl_fft_base.c
@@ -32,12 +32,8 @@ int dl_power_of_two(uint32_t n)
 
 float *dl_short_to_float(const int16_t *x, int len, int exponent, float *y)
 {
-    float scale = 1.0;
-    if (exponent > 0) {
-        scale = (1 << exponent);
-    } else if (exponent < 0) {
-        scale = 1.0 / (1 << (-exponent));
-    }
+    float scale = powf(2, exponent);
+    // printf("scale: %f\n", scale);
     for (int i = 0; i < len; i++) {
         y[i] = scale * x[i];
     }
@@ -46,7 +42,7 @@ float *dl_short_to_float(const int16_t *x, int len, int exponent, float *y)
 
 int16_t dl_array_max_q_s16(const int16_t *x, int size)
 {
-    int32_t max = x[0];
+    int16_t max = 0;
     for (int i = 1; i < size; i++) {
         if (x[i] > max) {
             max = x[i];
@@ -59,11 +55,38 @@ int16_t dl_array_max_q_s16(const int16_t *x, int size)
         return 1;
     }
 
-    uint16_t k = 2;
+    int16_t k = 2;
     while (max > 1) {
         k++;
         max = max >> 1;
     }
 
     return k;
 }
+
+int dl_array_max_q_f32(const float *x, int size, float eps)
+{
+    float max = 0;
+    for (int i = 1; i < size; i++) {
+        if (x[i] > max) {
+            max = x[i];
+        } else if (-x[i] > max) {
+            max = -x[i];
+        }
+    }
+    int max_int = ceilf(max + eps);
+
+    return dl_power_of_two(max_int);
+}
+
+int dl_float_to_short(const float *x, int len, int16_t *y, int out_exponent)
+{
+    int exponent = out_exponent - dl_array_max_q_f32(x, len, 1e-8);
+    float scale = powf(2, exponent);
+
+    for (int i = 0; i < len; i++) {
+        y[i] = (int16_t)roundf(x[i] * scale);
+    }
+
+    return -exponent;
+}
diff --git a/tools/dl_fft/base/dl_fft_base.h b/tools/dl_fft/base/dl_fft_base.h
@@ -19,6 +19,7 @@ bool dl_is_power_of_two(int x);
 int dl_power_of_two(uint32_t n);
 float *dl_short_to_float(const int16_t *x, int len, int exponent, float *y);
 int16_t dl_array_max_q_s16(const int16_t *x, int size);
+int dl_float_to_short(const float *x, int len, int16_t *y, int out_exponent);
 
 // float fftr2
 float *dl_gen_fftr2_table_f32(int fft_point, uint32_t caps);
diff --git a/tools/dl_fft/idf_component.yml b/tools/dl_fft/idf_component.yml
@@ -1,6 +1,6 @@
-version: "0.1.0"
+version: "0.2.0"
 license: "MIT"
 description: dl_fft is a lightweight and efficient fft library for all espressif chips.
 url: https://github.com/espressif/esp-dl/tree/master/esp-dl/tools/dl_fft
 dependencies: 
-  idf: ">=5.0"
+  idf: ">=5.0"