@@ -43,7 +43,6 @@ public override SimpleBitmap Decode(byte[] src) {
4343 SetBuffer ( src ) ;
4444 SimpleBitmap bmp = new SimpleBitmap ( ) ;
4545
46- ComputeIDCTFactors ( ) ;
4746 ReadMarkers ( src , bmp ) ;
4847 return bmp ;
4948 }
@@ -316,11 +315,6 @@ void DecodeMCUs(byte[] src, SimpleBitmap bmp) {
316315 float * output = stackalloc float [ BLOCK_SIZE ] ;
317316
318317 YCbCr [ ] colors = new YCbCr [ mcu_w * mcu_h ] ;
319- for ( int i = 0 ; i < colors . Length ; i ++ )
320- {
321- colors [ i ] . Cr = 128 ;
322- colors [ i ] . Cb = 128 ;
323- }
324318
325319 for ( int mcuY = 0 ; mcuY < mcus_y ; mcuY ++ )
326320 for ( int mcuX = 0 ; mcuX < mcus_x ; mcuX ++ )
@@ -379,13 +373,19 @@ void DecodeMCUs(byte[] src, SimpleBitmap bmp) {
379373 int globalY = baseY + YY ;
380374
381375 if ( globalX < bmp . Width && globalY < bmp . Height ) {
382- float y = colors [ j ] . Y ;
376+ // Following standard code assumes JPEG DCT output is level shifted
377+ // float r = 1.40200f * (cr - 128) + y;
378+ // float g = -0.34414f * (cb - 128) - 0.71414f * (cr - 128) + y;
379+ // float b = 1.77200f * (cb - 128) + y;
380+ // Use a slightly different algorithm to avoid level shifting
381+
382+ float y = colors [ j ] . Y + 128.0f ;
383383 float cr = colors [ j ] . Cr ;
384384 float cb = colors [ j ] . Cb ;
385385
386- float r = 1.40200f * ( cr - 128 ) + y ;
387- float g = - 0.34414f * ( cb - 128 ) - 0.71414f * ( cr - 128 ) + y ;
388- float b = 1.77200f * ( cb - 128 ) + y ;
386+ float r = 1.40200f * cr + y ;
387+ float g = - 0.34414f * cb - 0.71414f * cr + y ;
388+ float b = 1.77200f * cb + y ;
389389
390390 Pixel p = new Pixel ( ByteClamp ( r ) , ByteClamp ( g ) , ByteClamp ( b ) , 255 ) ;
391391 bmp . pixels [ globalY * bmp . Width + globalX ] = p ;
@@ -437,52 +437,93 @@ void DecodeBlock(JpegComponent comp, byte[] src, int[] block) {
437437 }
438438 } while ( idx < 64 ) ;
439439 }
440-
441- float [ ] idct_factors ;
442- void ComputeIDCTFactors ( ) {
443- float [ ] factors = new float [ 64 ] ;
444-
445- for ( int xy = 0 ; xy < 8 ; xy ++ )
446- {
447- for ( int uv = 0 ; uv < 8 ; uv ++ )
448- {
449- float cuv = uv == 0 ? 0.70710678f : 1.0f ;
450- float cosuv = ( float ) Math . Cos ( ( 2 * xy + 1 ) * uv * Math . PI / 16.0 ) ;
451-
452- factors [ ( xy * 8 ) + uv ] = cuv * cosuv ;
453- }
454- }
455- idct_factors = factors ;
456- }
457-
458- void IDCT ( int [ ] block , float * output ) {
459- float [ ] factors = idct_factors ;
460- float * tmp = stackalloc float [ BLOCK_SAMPLES * BLOCK_SAMPLES ] ;
461-
462- for ( int col = 0 ; col < BLOCK_SAMPLES ; col ++ )
440+
441+ const float A1 = 0.98078528040f ; // cos(1pi/16)
442+ const float A2 = 0.92387953251f ; // cos(2pi/16)
443+ const float A3 = 0.83146961230f ; // cos(3pi/16)
444+ const float A4 = 0.70710678118f ; // cos(4pi/16)
445+ const float A5 = 0.55557023302f ; // cos(5pi/16)
446+ const float A6 = 0.38268343236f ; // cos(6pi/16)
447+ const float A7 = 0.19509032201f ; // cos(7pi/16)
448+
449+ const float C1 = 0.98078528040f / 4.0f ; // cos(1pi/16)
450+ const float C2 = 0.92387953251f / 4.0f ; // cos(2pi/16)
451+ const float C3 = 0.83146961230f / 4.0f ; // cos(3pi/16)
452+ const float C4 = 0.70710678118f / 4.0f ; // cos(4pi/16)
453+ const float C5 = 0.55557023302f / 4.0f ; // cos(5pi/16)
454+ const float C6 = 0.38268343236f / 4.0f ; // cos(6pi/16)
455+ const float C7 = 0.19509032201f / 4.0f ; // cos(7pi/16)
456+
457+ const int DCT_SIZE = 8 ;
458+ static void IDCT ( int [ ] block , float * output ) {
459+ float * tmp = stackalloc float [ DCT_SIZE * DCT_SIZE ] ;
460+
461+ for ( int col = 0 ; col < DCT_SIZE ; col ++ )
463462 {
464- for ( int y = 0 ; y < BLOCK_SAMPLES ; y ++ )
465- {
466- float sum = 0.0f ;
467- for ( int v = 0 ; v < BLOCK_SAMPLES ; v ++ )
468- {
469- sum += block [ v * 8 + col ] * factors [ ( y * 8 ) + v ] ;
470- }
471- tmp [ y * 8 + col ] = sum ;
472- }
463+ float B0 = block [ 0 * DCT_SIZE + col ] , B1 = block [ 1 * DCT_SIZE + col ] ;
464+ float B2 = block [ 2 * DCT_SIZE + col ] , B3 = block [ 3 * DCT_SIZE + col ] ;
465+ float B4 = block [ 4 * DCT_SIZE + col ] , B5 = block [ 5 * DCT_SIZE + col ] ;
466+ float B6 = block [ 6 * DCT_SIZE + col ] , B7 = block [ 7 * DCT_SIZE + col ] ;
467+
468+ /* Phase 1 */ float a4 = A4 * B0 ;
469+ /* Phase 5 */ float e4 = A4 * B4 ;
470+ /* Phase 3 */ float c2 = A2 * B2 , c6 = A6 * B2 ;
471+ /* Phase 7 */ float g2 = A2 * B6 , g6 = A6 * B6 ;
472+ /* Phase 2 */ float b1 = A1 * B1 , b3 = A3 * B1 , b5 = A5 * B1 , b7 = A7 * B1 ;
473+ /* Phase 4 */ float d1 = A1 * B3 , d3 = A3 * B3 , d5 = A5 * B3 , d7 = A7 * B3 ;
474+ /* Phase 6 */ float f1 = A1 * B5 , f3 = A3 * B5 , f5 = A5 * B5 , f7 = A7 * B5 ;
475+ /* Phase 8 */ float h1 = A1 * B7 , h3 = A3 * B7 , h5 = A5 * B7 , h7 = A7 * B7 ;
476+
477+ /* Phase 1+5 */ float w1 = a4 + e4 , w2 = a4 - e4 ;
478+ /* Phase 3+7 */ float x1 = c2 + g6 , x2 = c6 - g2 ;
479+ /* Phase 2+6 */ float y1 = b1 + d3 , y2 = b3 - d7 , y3 = b5 - d1 , y4 = b7 - d5 ;
480+ /* Phase 4+8 */ float z1 = f5 + h7 , z2 = f1 + h5 , z3 = f7 + h3 , z4 = f3 - h1 ;
481+
482+ /* Phase 1+3+5+7 */ float u1 = w1 + x1 , u2 = w2 + x2 , u3 = w2 - x2 , u4 = w1 - x1 ;
483+ /* Phase 2+6+4+8 */ float v1 = y1 + z1 , v2 = y2 - z2 , v3 = y3 + z3 , v4 = y4 + z4 ;
484+
485+ tmp [ 0 * DCT_SIZE + col ] = u1 + v1 ;
486+ tmp [ 1 * DCT_SIZE + col ] = u2 + v2 ;
487+ tmp [ 2 * DCT_SIZE + col ] = u3 + v3 ;
488+ tmp [ 3 * DCT_SIZE + col ] = u4 + v4 ;
489+ tmp [ 4 * DCT_SIZE + col ] = u4 - v4 ;
490+ tmp [ 5 * DCT_SIZE + col ] = u3 - v3 ;
491+ tmp [ 6 * DCT_SIZE + col ] = u2 - v2 ;
492+ tmp [ 7 * DCT_SIZE + col ] = u1 - v1 ;
473493 }
474494
475- for ( int row = 0 ; row < BLOCK_SAMPLES ; row ++ )
495+ for ( int row = 0 ; row < DCT_SIZE ; row ++ )
476496 {
477- for ( int x = 0 ; x < BLOCK_SAMPLES ; x ++ )
478- {
479- float sum = 0.0f ;
480- for ( int u = 0 ; u < BLOCK_SAMPLES ; u ++ )
481- {
482- sum += tmp [ row * 8 + u ] * factors [ ( x * 8 ) + u ] ;
483- }
484- output [ row * 8 + x ] = ( sum / 4.0f ) + 128.0f ; // undo level shift at end
485- }
497+ float B0 = tmp [ row * DCT_SIZE + 0 ] , B1 = tmp [ row * DCT_SIZE + 1 ] ;
498+ float B2 = tmp [ row * DCT_SIZE + 2 ] , B3 = tmp [ row * DCT_SIZE + 3 ] ;
499+ float B4 = tmp [ row * DCT_SIZE + 4 ] , B5 = tmp [ row * DCT_SIZE + 5 ] ;
500+ float B6 = tmp [ row * DCT_SIZE + 6 ] , B7 = tmp [ row * DCT_SIZE + 7 ] ;
501+
502+ /* Phase 1 */ float a4 = C4 * B0 ;
503+ /* Phase 5 */ float e4 = C4 * B4 ;
504+ /* Phase 3 */ float c2 = C2 * B2 , c6 = C6 * B2 ;
505+ /* Phase 7 */ float g2 = C2 * B6 , g6 = C6 * B6 ;
506+ /* Phase 2 */ float b1 = C1 * B1 , b3 = C3 * B1 , b5 = C5 * B1 , b7 = C7 * B1 ;
507+ /* Phase 4 */ float d1 = C1 * B3 , d3 = C3 * B3 , d5 = C5 * B3 , d7 = C7 * B3 ;
508+ /* Phase 6 */ float f1 = C1 * B5 , f3 = C3 * B5 , f5 = C5 * B5 , f7 = C7 * B5 ;
509+ /* Phase 8 */ float h1 = C1 * B7 , h3 = C3 * B7 , h5 = C5 * B7 , h7 = C7 * B7 ;
510+
511+ /* Phase 1+5 */ float w1 = a4 + e4 , w2 = a4 - e4 ;
512+ /* Phase 3+7 */ float x1 = c2 + g6 , x2 = c6 - g2 ;
513+ /* Phase 2+6 */ float y1 = b1 + d3 , y2 = b3 - d7 , y3 = b5 - d1 , y4 = b7 - d5 ;
514+ /* Phase 4+8 */ float z1 = f5 + h7 , z2 = f1 + h5 , z3 = f7 + h3 , z4 = f3 - h1 ;
515+
516+ /* Phase 1+3+5+7 */ float u1 = w1 + x1 , u2 = w2 + x2 , u3 = w2 - x2 , u4 = w1 - x1 ;
517+ /* Phase 2+6+4+8 */ float v1 = y1 + z1 , v2 = y2 - z2 , v3 = y3 + z3 , v4 = y4 + z4 ;
518+
519+ output [ row * DCT_SIZE + 0 ] = u1 + v1 ;
520+ output [ row * DCT_SIZE + 1 ] = u2 + v2 ;
521+ output [ row * DCT_SIZE + 2 ] = u3 + v3 ;
522+ output [ row * DCT_SIZE + 3 ] = u4 + v4 ;
523+ output [ row * DCT_SIZE + 4 ] = u4 - v4 ;
524+ output [ row * DCT_SIZE + 5 ] = u3 - v3 ;
525+ output [ row * DCT_SIZE + 6 ] = u2 - v2 ;
526+ output [ row * DCT_SIZE + 7 ] = u1 - v1 ;
486527 }
487528 }
488529
@@ -548,7 +589,7 @@ byte ReadHuffman(HuffmanTable table, byte[] src) {
548589 return ( byte ) packed ;
549590 }
550591
551- ConsumeBits ( HUFF_FAST_BITS ) ;
592+ ConsumeBits ( HUFF_FAST_BITS ) ;
552593 for ( int i = HUFF_FAST_BITS ; i < HUFF_MAX_BITS ; i ++ )
553594 {
554595 codeword <<= 1 ;
0 commit comments