@@ -309,19 +309,18 @@ void DecodeMCUs(byte[] src, SimpleBitmap bmp) {
309309 for ( int y = 0 ; y < BLOCK_SAMPLES ; y ++ )
310310 for ( int x = 0 ; x < BLOCK_SAMPLES ; x ++ )
311311 {
312+ float sample = output [ y * BLOCK_SAMPLES + x ] ;
313+
312314 for ( int py = 0 ; py < samplesY ; py ++ )
313315 for ( int px = 0 ; px < samplesX ; px ++ )
314316 {
315317 int YY = ( by * BLOCK_SAMPLES + y ) * samplesY + py ;
316318 int XX = ( bx * BLOCK_SAMPLES + x ) * samplesX + px ;
317319 int idx = YY * mcu_w + XX ;
318320
319- if ( i == 0 )
320- colors [ idx ] . Y = output [ y * BLOCK_SAMPLES + x ] ;
321- else if ( i == 1 )
322- colors [ idx ] . Cb = output [ y * BLOCK_SAMPLES + x ] ;
323- else if ( i == 2 )
324- colors [ idx ] . Cr = output [ y * BLOCK_SAMPLES + x ] ;
321+ if ( i == 0 ) colors [ idx ] . Y = sample ;
322+ else if ( i == 1 ) colors [ idx ] . Cb = sample ;
323+ else if ( i == 2 ) colors [ idx ] . Cr = sample ;
325324 }
326325 }
327326 }
@@ -401,7 +400,7 @@ void DecodeBlock(JpegComponent comp, byte[] src, int[] block, float[] output) {
401400
402401 float [ ] idct_factors ;
403402 void ComputeIDCTFactors ( ) {
404- float [ ] factors = new float [ 128 ] ;
403+ float [ ] factors = new float [ 64 ] ;
405404
406405 for ( int xy = 0 ; xy < 8 ; xy ++ )
407406 {
@@ -410,29 +409,40 @@ void ComputeIDCTFactors() {
410409 float cuv = uv == 0 ? 0.70710678f : 1.0f ;
411410 float cosuv = ( float ) Math . Cos ( ( 2 * xy + 1 ) * uv * Math . PI / 16.0 ) ;
412411
413- factors [ ( 2 * xy + 1 ) * uv ] = cuv * cosuv ;
412+ factors [ ( xy * 8 ) + uv ] = cuv * cosuv ;
414413 }
415414 }
416415 idct_factors = factors ;
417416 }
418417
419- void IDCT ( int [ ] block , float [ ] output ) {
420- float [ ] factors = idct_factors ;
418+ unsafe void IDCT ( int [ ] block , float [ ] output ) {
419+ float [ ] factors = idct_factors ;
420+ float * tmp = stackalloc float [ BLOCK_SAMPLES * BLOCK_SAMPLES ] ;
421421
422- for ( int y = 0 ; y < 8 ; y ++ )
423- for ( int x = 0 ; x < 8 ; x ++ )
422+ for ( int col = 0 ; col < BLOCK_SAMPLES ; col ++ )
424423 {
425- float sum = 0.0f ;
426- for ( int v = 0 ; v < 8 ; v ++ )
427- for ( int u = 0 ; u < 8 ; u ++ )
424+ for ( int y = 0 ; y < BLOCK_SAMPLES ; y ++ )
428425 {
429- float suv = block [ v * 8 + u ] ;
430- float cu_cosu = factors [ ( 2 * x + 1 ) * u ] ;
431- float cv_cosv = factors [ ( 2 * y + 1 ) * v ] ;
432-
433- sum += cu_cosu * cv_cosv * suv ;
426+ float sum = 0.0f ;
427+ for ( int v = 0 ; v < BLOCK_SAMPLES ; v ++ )
428+ {
429+ sum += block [ v * 8 + col ] * factors [ ( y * 8 ) + v ] ;
430+ }
431+ tmp [ y * 8 + col ] = sum ;
432+ }
433+ }
434+
435+ for ( int row = 0 ; row < BLOCK_SAMPLES ; row ++ )
436+ {
437+ for ( int x = 0 ; x < BLOCK_SAMPLES ; x ++ )
438+ {
439+ float sum = 0.0f ;
440+ for ( int u = 0 ; u < BLOCK_SAMPLES ; u ++ )
441+ {
442+ sum += tmp [ row * 8 + u ] * factors [ ( x * 8 ) + u ] ;
443+ }
444+ output [ row * 8 + x ] = ( sum / 4.0f ) + 128.0f ; // undo level shift at end
434445 }
435- output [ y * 8 + x ] = ( sum / 4.0f ) + 128.0f ; // undo level shift at end
436446 }
437447 }
438448
0 commit comments