@@ -271,6 +271,9 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
271271 int i ;
272272 int N , N2 , N4 ;
273273 const kiss_twiddle_scalar * trig ;
274+ #ifdef FIXED_POINT
275+ int pre_shift , post_shift , fft_shift ;
276+ #endif
274277 (void ) arch ;
275278
276279 N = l -> n ;
@@ -283,6 +286,21 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
283286 N2 = N >>1 ;
284287 N4 = N >>2 ;
285288
289+ #ifdef FIXED_POINT
290+ {
291+ opus_val32 sumval = N2 ;
292+ opus_val32 maxval = 0 ;
293+ for (i = 0 ;i < N2 ;i ++ ) {
294+ maxval = MAX32 (maxval , ABS32 (in [i * stride ]));
295+ sumval = ADD32_ovflw (sumval , ABS32 (SHR32 (in [i * stride ],4 )));
296+ }
297+ pre_shift = IMAX (0 , 29 - celt_ilog2 (1 + SHR32 (maxval ,2 )* 3 ));
298+ /* Worst-case where all the energy goes to a single sample. */
299+ post_shift = IMAX (0 , 26 - celt_ilog2 (ABS32 (sumval )));
300+ post_shift = IMIN (post_shift , pre_shift );
301+ fft_shift = pre_shift - post_shift ;
302+ }
303+ #endif
286304 /* Pre-rotate */
287305 {
288306 /* Temp pointers to make it really clear to the compiler what we're doing */
@@ -297,8 +315,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
297315 kiss_fft_scalar yr , yi ;
298316 opus_val32 x1 , x2 ;
299317 rev = * bitrev ++ ;
300- x1 = SHL32_ovflw (* xp1 , IMDCT_HEADROOM );
301- x2 = SHL32_ovflw (* xp2 , IMDCT_HEADROOM );
318+ x1 = SHL32_ovflw (* xp1 , pre_shift );
319+ x2 = SHL32_ovflw (* xp2 , pre_shift );
302320 yr = ADD32_ovflw (S_MUL (x2 , t [i ]), S_MUL (x1 , t [N4 + i ]));
303321 yi = SUB32_ovflw (S_MUL (x1 , t [i ]), S_MUL (x2 , t [N4 + i ]));
304322 /* We swap real and imag because we use an FFT instead of an IFFT. */
@@ -310,7 +328,7 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
310328 }
311329 }
312330
313- opus_fft_impl (l -> kfft [shift ], (kiss_fft_cpx * )(out + (overlap >>1 )) ARG_FIXED (0 ));
331+ opus_fft_impl (l -> kfft [shift ], (kiss_fft_cpx * )(out + (overlap >>1 )) ARG_FIXED (fft_shift ));
314332
315333 /* Post-rotate and de-shuffle from both ends of the buffer at once to make
316334 it in-place. */
@@ -330,8 +348,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
330348 t0 = t [i ];
331349 t1 = t [N4 + i ];
332350 /* We'd scale up by 2 here, but instead it's done when mixing the windows */
333- yr = PSHR32_ovflw (ADD32_ovflw (S_MUL (re ,t0 ), S_MUL (im ,t1 )), IMDCT_HEADROOM );
334- yi = PSHR32_ovflw (SUB32_ovflw (S_MUL (re ,t1 ), S_MUL (im ,t0 )), IMDCT_HEADROOM );
351+ yr = PSHR32_ovflw (ADD32_ovflw (S_MUL (re ,t0 ), S_MUL (im ,t1 )), post_shift );
352+ yi = PSHR32_ovflw (SUB32_ovflw (S_MUL (re ,t1 ), S_MUL (im ,t0 )), post_shift );
335353 /* We swap real and imag because we're using an FFT instead of an IFFT. */
336354 re = yp1 [1 ];
337355 im = yp1 [0 ];
@@ -341,8 +359,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
341359 t0 = t [(N4 - i - 1 )];
342360 t1 = t [(N2 - i - 1 )];
343361 /* We'd scale up by 2 here, but instead it's done when mixing the windows */
344- yr = PSHR32_ovflw (ADD32_ovflw (S_MUL (re ,t0 ), S_MUL (im ,t1 )), IMDCT_HEADROOM );
345- yi = PSHR32_ovflw (SUB32_ovflw (S_MUL (re ,t1 ), S_MUL (im ,t0 )), IMDCT_HEADROOM );
362+ yr = PSHR32_ovflw (ADD32_ovflw (S_MUL (re ,t0 ), S_MUL (im ,t1 )), post_shift );
363+ yi = PSHR32_ovflw (SUB32_ovflw (S_MUL (re ,t1 ), S_MUL (im ,t0 )), post_shift );
346364 yp1 [0 ] = yr ;
347365 yp0 [1 ] = yi ;
348366 yp0 += 2 ;
0 commit comments