avcodec/jpeg2000dec: Fix FF_DWT97_INT to pass the conformance testing defined in ISO/IEC 15444-4

osamu620 · osamu620 · commit bc8aeee159ef · 2024-09-05T15:53:12.000+09:00
This commit fixes the problem described below on the integer version of the inverse 9-7 DWT processing (FF_DWT97_INT, https://trac.ffmpeg.org/ticket/10123), which is activated with `-flags +bitexact`. - Problem - The tests for the following codestreams were failed with `-flags +bitexact`. - p0_04.j2k, p0_05.j2k, p0_09.j2k, p1_02.j2k, p1_03.j2k, p1_06.j2k. - ds0_ht_04_b11.j2k, ds0_ht_04_b12.j2k, ds0_ht_05_b11.j2k, ds0_ht_05_b12.j2k, ds0_ht_09_b11.j2k, ds1_ht_02_b11.j2k, ds1_ht_02_b12.j2k, ds1_ht_03_b11.j2k, ds1_ht_03_b12.j2k, ds1_ht_06_b11.j2k. - These failure comes from the insufficient fraction bits of the fixed-point implementation of the 9-7 DWT.
diff --git a/libavcodec/j2kenc.c b/libavcodec/j2kenc.c
@@ -1384,7 +1384,9 @@ static void truncpasses(Jpeg2000EncoderContext *s, Jpeg2000Tile *tile)
                     Jpeg2000Band *band = reslevel->band + bandno;
                     Jpeg2000Prec *prec = band->prec + precno;
 
-                    int64_t dwt_norm = dwt_norms[codsty->transform == FF_DWT53][bandpos][lev] * (int64_t)band->i_stepsize >> 15;
+                    // Shifting down to 1 bit above from the binary point.
+                    // This is mandatory for FF_DWT97_INT to maintain its precision.
+                    int64_t dwt_norm = dwt_norms[codsty->transform == FF_DWT53][bandpos][lev] * (int64_t)band->i_stepsize >> 14;
                     int64_t lambda_prime = av_rescale(s->lambda, 1 << WMSEDEC_SHIFT, dwt_norm * dwt_norm);
                     for (cblkno = 0; cblkno < prec->nb_codeblocks_height * prec->nb_codeblocks_width; cblkno++){
                         Jpeg2000Cblk *cblk = prec->cblk + cblkno;
@@ -1457,7 +1459,10 @@ static int encode_tile(Jpeg2000EncoderContext *s, Jpeg2000Tile *tile, int tileno
                                 int *ptr = t1.data + (y-yy0)*t1.stride;
                                 for (x = xx0; x < xx1; x++){
                                     *ptr = (comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) * y + x]);
-                                    *ptr = (int64_t)*ptr * (int64_t)(16384 * 65536 / band->i_stepsize) >> 15 - NMSEDEC_FRACBITS;
+
+                                    // Shifting down to 1 bit above from the binary point.
+                                    // This is mandatory for FF_DWT97_INT to maintain its precision.
+                                    *ptr = (int64_t)*ptr * (int64_t)(16384 * 65536 / band->i_stepsize) >> 14 - NMSEDEC_FRACBITS;
                                     ptr++;
                                 }
                             }
diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
@@ -260,17 +260,16 @@ static void init_band_stepsize(AVCodecContext *avctx,
                 band->f_stepsize *= F_LFTG_X * F_LFTG_X * 4;
                 break;
         }
-        if (codsty->transform == FF_DWT97) {
-            band->f_stepsize *= pow(F_LFTG_K, 2*(codsty->nreslevels2decode - reslevelno) + lband - 2);
-        }
+        // scaling
+        band->f_stepsize *= pow(F_LFTG_K, 2*(codsty->nreslevels2decode - reslevelno) + lband - 2);
     }
 
     if (band->f_stepsize > (INT_MAX >> 15)) {
         band->f_stepsize = 0;
         av_log(avctx, AV_LOG_ERROR, "stepsize out of range\n");
     }
 
-    band->i_stepsize = band->f_stepsize * (1 << 15);
+    band->i_stepsize = lrint(band->f_stepsize * (1 << 15) + 0.5f);
 
     /* FIXME: In OpenJPEG code stepsize = stepsize * 0.5. Why?
      * If not set output of entropic decoder is not correct. */
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
@@ -2136,7 +2136,9 @@ static void dequantization_int_97(int x, int y, Jpeg2000Cblk *cblk,
         int32_t *datap = &comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) * (y + j) + x];
         int *src = t1->data + j*t1->stride;
         for (i = 0; i < w; ++i)
-            datap[i] = (src[i] * (int64_t)band->i_stepsize + (1<<15)) >> 16;
+            // Shifting down to 1 bit above from the binary point.
+            // This is mandatory for FF_DWT97_INT to pass the conformance testing.
+            datap[i] = (int32_t)(src[i] * (int64_t)band->i_stepsize + (1 << 14)) >> 15;
     }
 }
 
diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c
@@ -39,7 +39,7 @@
 
 /* Lifting parameters in integer format.
  * Computed as param = (float param) * (1 << 16) */
-#define I_LFTG_ALPHA  103949ll
+#define I_LFTG_ALPHA   38413ll // = 103949 - 65536, (= 1.586 - 1.0)
 #define I_LFTG_BETA     3472ll
 #define I_LFTG_GAMMA   57862ll
 #define I_LFTG_DELTA   29066ll
@@ -234,8 +234,11 @@ static void sd_1d97_int(int *p, int i0, int i1)
     extend97_int(p, i0, i1);
     i0++; i1++;
 
-    for (i = (i0>>1) - 2; i < (i1>>1) + 1; i++)
-        p[2 * i + 1] -= (I_LFTG_ALPHA * (p[2 * i]     + p[2 * i + 2]) + (1 << 15)) >> 16;
+    for (i = (i0>>1) - 2; i < (i1>>1) + 1; i++) {
+        int64_t sum = p[2 * i] + p[2 * i + 2];
+        p[2 * i + 1] -= sum;
+        p[2 * i + 1] -= (I_LFTG_ALPHA * sum + (1 << 15)) >> 16;
+    }
     for (i = (i0>>1) - 1; i < (i1>>1) + 1; i++)
         p[2 * i]     -= (I_LFTG_BETA  * (p[2 * i - 1] + p[2 * i + 1]) + (1 << 15)) >> 16;
     for (i = (i0>>1) - 1; i < (i1>>1); i++)
@@ -276,7 +279,7 @@ static void dwt_encode97_int(DWTContext *s, int *t)
 
             // copy back and deinterleave
             for (i =   mv; i < lv; i+=2, j++)
-                t[w*j + lp] = ((l[i] * I_LFTG_X) + (1 << 15)) >> 16;
+                t[w*j + lp] = l[i];
             for (i = 1-mv; i < lv; i+=2, j++)
                 t[w*j + lp] = l[i];
         }
@@ -293,15 +296,18 @@ static void dwt_encode97_int(DWTContext *s, int *t)
 
             // copy back and deinterleave
             for (i =   mh; i < lh; i+=2, j++)
-                t[w*lp + j] = ((l[i] * I_LFTG_X) + (1 << 15)) >> 16;
+                t[w*lp + j] = l[i];
             for (i = 1-mh; i < lh; i+=2, j++)
                 t[w*lp + j] = l[i];
         }
 
     }
 
     for (i = 0; i < w * h; i++)
-        t[i] = (t[i] + ((1<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
+        // Shifting down to the binary point.
+        // In FF_DWT97_INT, the binary point of the input coefficients is 1 bit above from the LSB.
+        // So, we need `>> (I_PRESHIFT + 1)`  here.
+        t[i] = (t[i] + ((1<<(I_PRESHIFT + 1))>>1)) >> (I_PRESHIFT + 1);
 }
 
 static void sr_1d53(unsigned *p, int i0, int i1)
@@ -471,8 +477,11 @@ static void sr_1d97_int(int32_t *p, int i0, int i1)
     for (i = (i0 >> 1); i < (i1 >> 1) + 1; i++)
         p[2 * i]     += (I_LFTG_BETA  * (p[2 * i - 1] + (int64_t)p[2 * i + 1]) + (1 << 15)) >> 16;
     /* step 6 */
-    for (i = (i0 >> 1); i < (i1 >> 1); i++)
-        p[2 * i + 1] += (I_LFTG_ALPHA * (p[2 * i]     + (int64_t)p[2 * i + 2]) + (1 << 15)) >> 16;
+    for (i = (i0 >> 1); i < (i1 >> 1); i++) {
+        int64_t sum = p[2 * i] + (int64_t) p[2 * i + 2];
+        p[2 * i + 1] += sum;
+        p[2 * i + 1] += (I_LFTG_ALPHA * sum + (1 << 15)) >> 16;
+    }
 }
 
 static void dwt_decode97_int(DWTContext *s, int32_t *t)
@@ -500,9 +509,9 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
         l = line + mh;
         for (lp = 0; lp < lv; lp++) {
             int i, j = 0;
-            // rescale with interleaving
+            // interleaving
             for (i = mh; i < lh; i += 2, j++)
-                l[i] = ((data[w * lp + j] * I_LFTG_K) + (1 << 15)) >> 16;
+                l[i] = data[w * lp + j];
             for (i = 1 - mh; i < lh; i += 2, j++)
                 l[i] = data[w * lp + j];
 
@@ -516,9 +525,9 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
         l = line + mv;
         for (lp = 0; lp < lh; lp++) {
             int i, j = 0;
-            // rescale with interleaving
+            // interleaving
             for (i = mv; i < lv; i += 2, j++)
-                l[i] = ((data[w * j + lp] * I_LFTG_K) + (1 << 15)) >> 16;
+                l[i] = data[w * j + lp];
             for (i = 1 - mv; i < lv; i += 2, j++)
                 l[i] = data[w * j + lp];
 
@@ -530,7 +539,10 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
     }
 
     for (i = 0; i < w * h; i++)
-        data[i] = (data[i] + ((1LL<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
+        // Shifting down to the binary point.
+        // In FF_DWT97_INT, the binary point of the input coefficients is 1 bit above from the LSB.
+        // So, we need `>> (I_PRESHIFT + 1)`  here.
+        data[i] = (int32_t)(data[i] + ((1LL<<(I_PRESHIFT + 1))>>1)) >> (I_PRESHIFT + 1);
 }
 
 int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],

Original file line number	Diff line number	Diff line change
`@@ -260,17 +260,16 @@ static void init_band_stepsize(AVCodecContext *avctx,`
`260`	`260`	`band->f_stepsize = F_LFTG_X F_LFTG_X * 4;`
`261`	`261`	`break;`
`262`	`262`	`}`
`263`		`- if (codsty->transform == FF_DWT97) {`
`264`		`- band->f_stepsize = pow(F_LFTG_K, 2(codsty->nreslevels2decode - reslevelno) + lband - 2);`
`265`		`- }`
	`263`	`+ // scaling`
	`264`	`+ band->f_stepsize = pow(F_LFTG_K, 2(codsty->nreslevels2decode - reslevelno) + lband - 2);`
`266`	`265`	`}`
`267`	`266`
`268`	`267`	`if (band->f_stepsize > (INT_MAX >> 15)) {`
`269`	`268`	`band->f_stepsize = 0;`
`270`	`269`	`av_log(avctx, AV_LOG_ERROR, "stepsize out of range\n");`
`271`	`270`	`}`
`272`	`271`
`273`		`- band->i_stepsize = band->f_stepsize * (1 << 15);`
	`272`	`+ band->i_stepsize = lrint(band->f_stepsize * (1 << 15) + 0.5f);`
`274`	`273`
`275`	`274`	`/* FIXME: In OpenJPEG code stepsize = stepsize * 0.5. Why?`
`276`	`275`	`* If not set output of entropic decoder is not correct. */`
Original file line number	Diff line number	Diff line change
`@@ -2136,7 +2136,9 @@ static void dequantization_int_97(int x, int y, Jpeg2000Cblk *cblk,`
`2136`	`2136`	`int32_t datap = &comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) (y + j) + x];`
`2137`	`2137`	`int src = t1->data + jt1->stride;`
`2138`	`2138`	`for (i = 0; i < w; ++i)`
`2139`		`- datap[i] = (src[i] * (int64_t)band->i_stepsize + (1<<15)) >> 16;`
	`2139`	`+ // Shifting down to 1 bit above from the binary point.`
	`2140`	`+ // This is mandatory for FF_DWT97_INT to pass the conformance testing.`
	`2141`	`+ datap[i] = (int32_t)(src[i] * (int64_t)band->i_stepsize + (1 << 14)) >> 15;`
`2140`	`2142`	`}`
`2141`	`2143`	`}`
`2142`	`2144`