AcademySoftwareFoundation
diff --git a/‎README.md‎
Lines changed: 12 additions & 0 deletions b/‎README.md‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎app/oapv_app_enc.c‎
Lines changed: 3 additions & 3 deletions b/‎app/oapv_app_enc.c‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎readme/empty.txt‎
Lines changed: 0 additions & 1 deletion b/‎readme/empty.txt‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎readme/img/apv_parser_on_imhex.png‎
164 KB b/‎readme/img/apv_parser_on_imhex.png‎
164 KB
diff --git a/‎src/avx/oapv_tq_avx.c‎
Lines changed: 16 additions & 16 deletions b/‎src/avx/oapv_tq_avx.c‎
Lines changed: 16 additions & 16 deletions
diff --git a/‎src/oapv.c‎
Lines changed: 1 addition & 1 deletion b/‎src/oapv.c‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/oapv_tq.c‎
Lines changed: 5 additions & 0 deletions b/‎src/oapv_tq.c‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎test/README.md‎
Lines changed: 9 additions & 9 deletions b/‎test/README.md‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎test/bitstream/qp_B.apv‎
0 Bytes b/‎test/bitstream/qp_B.apv‎
0 Bytes
diff --git a/‎test/bitstream/qp_D.apv‎
0 Bytes b/‎test/bitstream/qp_D.apv‎
0 Bytes
@@ -97,6 +97,18 @@ Decoding:
 
     oapv_app_dec -i encoded.apv -o output.y4m
 
+## Utility
+
+### Graphical APV bitstream parser
+
+Pattern file of APV bitstream for [ImHex](https://github.com/WerWolv/ImHex) is provided [here](/util/apv.hexpat).
+1. Install [ImHex](https://github.com/WerWolv/ImHex) application
+2. Download [APV pattern file](/util/apv.hexpat)
+2. Open APV bitstream (\*.apv file) with ImHex
+3. Import the APV pattern file on Pattern editor view of ImHex and apply
+
+![APV_on_ImHex](/readme/img/apv_parser_on_imhex.png)
+
 ## Testing
 
 In build directory run ``ctest``
 
@@ -499,9 +499,9 @@ static int update_param(args_var_t *vars, oapve_param_t *param)
     param->csp = vars->input_csp;
 
     /* update level idc */
-    double tmp_level = 0;
-    sscanf(vars->level, "%lf", &tmp_level);
-    param->level_idc = tmp_level * 30;
+    float tmp_level = 0;
+    sscanf(vars->level, "%f", &tmp_level);
+    param->level_idc = (int)((tmp_level * 30.0) + 0.5);
     /* update band idc */
     param->band_idc = vars->band;
 
 
@@ -519,6 +519,8 @@ static int oapv_quant_avx(s16* coef, u8 qp, int q_matrix[OAPV_BLK_D], int log2_w
     shift = QUANT_SHIFT + tr_shift + (qp / 6);
     offset = (s64)deadzone_offset << (shift - 9);
     __m256i offset_vector = _mm256_set1_epi64x(offset);
+    __m256i reg_minval_int16 = _mm256_set1_epi32(-32768);
+    __m256i reg_maxval_int16 = _mm256_set1_epi32(32767);
 
     int pixels = (1 << (log2_w + log2_h));
     int i;
@@ -533,16 +535,16 @@ static int oapv_quant_avx(s16* coef, u8 qp, int q_matrix[OAPV_BLK_D], int log2_w
         0, 1, 4, 5, 8, 9, 12, 13,
         -128, -128, -128, -128, -128, -128, -128, -128,
         -128, -128, -128, -128, -128, -128, -128, -128);
-    
+
     for (i = 0; i < pixels; i += 8)
     {
         // Load first row
         __m256i quant_matrix = _mm256_lddqu_si256((__m256i*)(q_matrix + i));
         __m128i coef_row = _mm_lddqu_si128((__m128i*)(coef + i));
 
         // Extract sign
-        __m256i coef_row_cast = _mm256_castsi128_si256(coef_row);
-        __m256i sign_mask = _mm256_srai_epi16(coef_row_cast, 15);
+        __m128i sign_mask = _mm_srai_epi16(coef_row, 15);
+        __m256i sign_mask_ext = _mm256_cvtepi16_epi32(sign_mask);
 
         // Convert to 32 bits and take abs()
         __m256i coef_row_ext = _mm256_cvtepi16_epi32(coef_row);
@@ -557,25 +559,23 @@ static int oapv_quant_avx(s16* coef, u8 qp, int q_matrix[OAPV_BLK_D], int log2_w
         // First level of combination
         lev2_low = _mm256_slli_epi64(lev2_low, 32);
         __m256i combined = _mm256_or_si256(lev2_low, lev2_high);
+        __m256i levx = _mm256_permutevar8x32_epi32(combined, shuffle0);
+
+        // Apply sign and clipping
+        levx = _mm256_sub_epi32(_mm256_xor_si256(levx, sign_mask_ext), sign_mask_ext);
+        levx = _mm256_max_epi32(levx, reg_minval_int16);
+        levx = _mm256_min_epi32(levx, reg_maxval_int16);
 
         // Second level of combination
-        __m256i levx = _mm256_permutevar8x32_epi32(combined, shuffle0);
-        __m128i levx_low = _mm256_castsi256_si128(levx);
-        __m256i levx_low_ext = _mm256_castsi128_si256(levx_low);
-        levx_low_ext = _mm256_shuffle_epi8(levx_low_ext, shuffle1);
+        __m256i levx_low_sh = _mm256_shuffle_epi8(levx, shuffle1);
         __m128i levx_high = _mm256_extracti128_si256(levx, 1);
         __m256i levx_high_ext = _mm256_castsi128_si256(levx_high);
-        levx_high_ext = _mm256_shuffle_epi8(levx_high_ext, shuffle2);
-        levx = _mm256_or_si256(levx_high_ext, levx_low_ext);
-
-        // Apply sign
-        levx = _mm256_sub_epi16(_mm256_xor_si256(levx, sign_mask), sign_mask);
+        __m256i levx_high_sh = _mm256_shuffle_epi8(levx_high_ext, shuffle2);
+        levx = _mm256_or_si256(levx_high_sh, levx_low_sh);
 
-        // Clip and store in coef
+        // store in coef
         __m128i lev4 = _mm256_castsi256_si128(levx);
-        __m128i lev5 = _mm_max_epi16(lev4, _mm_set1_epi16(-32768));
-        __m128i lev6 = _mm_min_epi16(lev5, _mm_set1_epi16(32767));
-        _mm_storeu_si128((__m128i*)(coef + i), lev6);
+        _mm_storeu_si128((__m128i*)(coef + i), lev4);
     }
     return OAPV_OK;
 }
 
@@ -1467,7 +1467,7 @@ int oapve_param_default(oapve_param_t *param)
     param->tile_h_mb = 16;
 
     param->profile_idc = OAPV_PROFILE_422_10;
-    param->level_idc = (int)(4.1 * 30);
+    param->level_idc = (int)((4.1 * 30.0) + 0.5);
     param->band_idc = 2;
 
     param->use_q_matrix = 0;
 
@@ -97,6 +97,11 @@ void oapv_trans(oapve_ctx_t *ctx, s16 *coef, int log2_w, int log2_h, int bit_dep
 
 static int oapv_quant(s16 *coef, u8 qp, int q_matrix[OAPV_BLK_D], int log2_w, int log2_h, int bit_depth, int deadzone_offset)
 {
+    // coef is the output of the transform, the bit range is 16
+    // q_matrix has the value of q_scale * 16 / q_matrix, the bit range is 19
+    // (precision of q_scale is 15, and the range of q_mtrix is 1~255)
+    // lev is the product of abs(coef) and q_matrix, the bit range is 35
+
     s64 lev;
     s32 offset;
     int sign;
 
@@ -5,18 +5,18 @@
 
 | No. | Bitstream Name | Description                                                  | Profile&nbsp;&nbsp; | Level | Band | Frame Rate | Resolution | # of Frame | MD5 sum of bitstream             |
 |-----|----------------|--------------------------------------------------------------|---------------------|-------|------|------------|------------|------------|----------------------------------|
-| 1   | tile_A         | one-tile per   one-picture                                   | 422-10              | 4.1   | 2    | 60 fps     | 3840x2160  | 3          | b6e1cef839381b2c90cb9ffcdf537d77 |
-| 2   | tile_B         | Tile size = min size   tile (256x128)                        | 422-10              | 4.1   | 2    | 60 fps     | 3840x2160  | 3          | 9a0cb5126d705b03a2e7bcdcbacf6fbf |
+| 1   | tile_A         | one-tile per   one-picture                                   | 422-10              | 4.1   | 2    | 60 fps     | 3840x2160  | 3          | 74c5c0ca1bd2cfb28c6e2e0673e965f9 |
+| 2   | tile_B         | Tile size = min size   tile (256x128)                        | 422-10              | 4.1   | 2    | 60 fps     | 3840x2160  | 3          | 666ec80235a1e8f59db044d77a89a495 |
 | 3   | tile_C         | # of Tiles: max num   tile (20x20)                           | 422-10              | 5     | 0    | 30 fps     | 7680x4320  | 3          | 75363d036965a9dccc90a9ce8d0ae652 |
-| 4   | tile_D         | tile dummy data test                                         | 422-10              | 4.1   | 2    | 60 fps     | 3840x2160  | 3          | 0394e3ac275e2bc595c07c5290dc9466 |
-| 5   | tile_E         | tile_size_present_in_fh_flag=on                              | 422-10              | 4.1   | 2    | 60 fps     | 3840x2160  | 3          | fdf72572b6551bc6a9eed7f80ca0ec0f |
+| 4   | tile_D         | tile dummy data test                                         | 422-10              | 4.1   | 2    | 60 fps     | 3840x2160  | 3          | dd492519c90409a9ca5710746f45c125 |
+| 5   | tile_E         | tile_size_present_in_fh_flag=on                              | 422-10              | 4.1   | 2    | 60 fps     | 3840x2160  | 3          | 134c4aa46cec9ab0299824682a89eecd |
 | 6   | qp_A           | QP matrix enabled                                            | 422-10              | 4.1   | 2    | 60 fps     | 3840x2160  | 3          | 5ca6d4ea0f65add261b44ed3532a0a73 |
-| 7   | qp_B           | Tile QP   variation in a frame                               | 422-10              | 4.1   | 2    | 60 fps     | 3840x2160  | 3          | 1b24d4f97c18545b7881002cc642839b |
+| 7   | qp_B           | Tile QP   variation in a frame                               | 422-10              | 4.1   | 2    | 60 fps     | 3840x2160  | 3          | 85bfa477911447d994c17dea9703a9c7 |
 | 8   | qp_C           | Set all the QPs in a   frame equal to min. QP (=0)           | 422-10              | 6     | 2    | 60 fps     | 3840x2160  | 3          | 8c2928ec05eb06d42d6a8bda0ceb7e8d |
-| 9   | qp_D           | Set all the QPs in a   frame equal to max. QP (=51)          | 422-10              | 4.1   | 2    | 60 fps     | 3840x2160  | 3          | e5acd3d3a0aa7bd6a45a49af35980512 |
-| 10  | qp_E           | Set different QP   betwee luma and chroma                    | 422-10              | 4.1   | 2    | 60 fps     | 3840x2160  | 3          | e58ea5df35750c0d19cffefde12e78c4 |
-| 11  | syn_A          | Exercise a synthetic   image with QP = 0 and QP = 51         | 422-10              | 4.1   | 2    | 60 fps     | 1920x1080  | 2          | e1593a670c62d69718986ff84d1150f3 |
-| 12  | syn_B          | Exercise a synthetic   image with Tile QP variation in Frame | 422-10              | 4.1   | 2    | 60 fps     | 1920x1080  | 2          | 9f188e39824829aa05db584034ab1fd0 |
+| 9   | qp_D           | Set all the QPs in a   frame equal to max. QP (=51)          | 422-10              | 4.1   | 2    | 60 fps     | 3840x2160  | 3          | 9c98e376fb59100f5a5585482fb33746 |
+| 10  | qp_E           | Set different QP   betwee luma and chroma                    | 422-10              | 4.1   | 2    | 60 fps     | 3840x2160  | 3          | 6d1a1bc982d412758f353c8d041979d1 |
+| 11  | syn_A          | Exercise a synthetic   image with QP = 0 and QP = 51         | 422-10              | 4.1   | 2    | 60 fps     | 1920x1080  | 2          | db9f8f7ce57871481e5b257b79149b1e |
+| 12  | syn_B          | Exercise a synthetic   image with Tile QP variation in Frame | 422-10              | 4.1   | 2    | 60 fps     | 1920x1080  | 2          | 5f6c57f0bfe7ceb2f97a56a3bec7fb7a |
 
 ## Test sequence
 "sequence" folder has the uncompressed video sequence for encoder testing.