Skip to content

Commit f6edb2a

Browse files
authored
fixed level_idc and AVX SIMD, added APV pattern file for ImHex (#49)
* fixed level_idc and AVX SIMD Signed-off-by: [email protected] <[email protected]> * recover miss-information in README.md --------- Signed-off-by: [email protected] <[email protected]>
1 parent c89c53d commit f6edb2a

File tree

19 files changed

+271
-31
lines changed

19 files changed

+271
-31
lines changed

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,18 @@ Decoding:
9797

9898
oapv_app_dec -i encoded.apv -o output.y4m
9999

100+
## Utility
101+
102+
### Graphical APV bitstream parser
103+
104+
Pattern file of APV bitstream for [ImHex](https://github.com/WerWolv/ImHex) is provided [here](/util/apv.hexpat).
105+
1. Install [ImHex](https://github.com/WerWolv/ImHex) application
106+
2. Download [APV pattern file](/util/apv.hexpat)
107+
2. Open APV bitstream (\*.apv file) with ImHex
108+
3. Import the APV pattern file on Pattern editor view of ImHex and apply
109+
110+
![APV_on_ImHex](/readme/img/apv_parser_on_imhex.png)
111+
100112
## Testing
101113

102114
In build directory run ``ctest``

app/oapv_app_enc.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -499,9 +499,9 @@ static int update_param(args_var_t *vars, oapve_param_t *param)
499499
param->csp = vars->input_csp;
500500

501501
/* update level idc */
502-
double tmp_level = 0;
503-
sscanf(vars->level, "%lf", &tmp_level);
504-
param->level_idc = tmp_level * 30;
502+
float tmp_level = 0;
503+
sscanf(vars->level, "%f", &tmp_level);
504+
param->level_idc = (int)((tmp_level * 30.0) + 0.5);
505505
/* update band idc */
506506
param->band_idc = vars->band;
507507

readme/empty.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

readme/img/apv_parser_on_imhex.png

164 KB
Loading

src/avx/oapv_tq_avx.c

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,8 @@ static int oapv_quant_avx(s16* coef, u8 qp, int q_matrix[OAPV_BLK_D], int log2_w
519519
shift = QUANT_SHIFT + tr_shift + (qp / 6);
520520
offset = (s64)deadzone_offset << (shift - 9);
521521
__m256i offset_vector = _mm256_set1_epi64x(offset);
522+
__m256i reg_minval_int16 = _mm256_set1_epi32(-32768);
523+
__m256i reg_maxval_int16 = _mm256_set1_epi32(32767);
522524

523525
int pixels = (1 << (log2_w + log2_h));
524526
int i;
@@ -533,16 +535,16 @@ static int oapv_quant_avx(s16* coef, u8 qp, int q_matrix[OAPV_BLK_D], int log2_w
533535
0, 1, 4, 5, 8, 9, 12, 13,
534536
-128, -128, -128, -128, -128, -128, -128, -128,
535537
-128, -128, -128, -128, -128, -128, -128, -128);
536-
538+
537539
for (i = 0; i < pixels; i += 8)
538540
{
539541
// Load first row
540542
__m256i quant_matrix = _mm256_lddqu_si256((__m256i*)(q_matrix + i));
541543
__m128i coef_row = _mm_lddqu_si128((__m128i*)(coef + i));
542544

543545
// Extract sign
544-
__m256i coef_row_cast = _mm256_castsi128_si256(coef_row);
545-
__m256i sign_mask = _mm256_srai_epi16(coef_row_cast, 15);
546+
__m128i sign_mask = _mm_srai_epi16(coef_row, 15);
547+
__m256i sign_mask_ext = _mm256_cvtepi16_epi32(sign_mask);
546548

547549
// Convert to 32 bits and take abs()
548550
__m256i coef_row_ext = _mm256_cvtepi16_epi32(coef_row);
@@ -557,25 +559,23 @@ static int oapv_quant_avx(s16* coef, u8 qp, int q_matrix[OAPV_BLK_D], int log2_w
557559
// First level of combination
558560
lev2_low = _mm256_slli_epi64(lev2_low, 32);
559561
__m256i combined = _mm256_or_si256(lev2_low, lev2_high);
562+
__m256i levx = _mm256_permutevar8x32_epi32(combined, shuffle0);
563+
564+
// Apply sign and clipping
565+
levx = _mm256_sub_epi32(_mm256_xor_si256(levx, sign_mask_ext), sign_mask_ext);
566+
levx = _mm256_max_epi32(levx, reg_minval_int16);
567+
levx = _mm256_min_epi32(levx, reg_maxval_int16);
560568

561569
// Second level of combination
562-
__m256i levx = _mm256_permutevar8x32_epi32(combined, shuffle0);
563-
__m128i levx_low = _mm256_castsi256_si128(levx);
564-
__m256i levx_low_ext = _mm256_castsi128_si256(levx_low);
565-
levx_low_ext = _mm256_shuffle_epi8(levx_low_ext, shuffle1);
570+
__m256i levx_low_sh = _mm256_shuffle_epi8(levx, shuffle1);
566571
__m128i levx_high = _mm256_extracti128_si256(levx, 1);
567572
__m256i levx_high_ext = _mm256_castsi128_si256(levx_high);
568-
levx_high_ext = _mm256_shuffle_epi8(levx_high_ext, shuffle2);
569-
levx = _mm256_or_si256(levx_high_ext, levx_low_ext);
570-
571-
// Apply sign
572-
levx = _mm256_sub_epi16(_mm256_xor_si256(levx, sign_mask), sign_mask);
573+
__m256i levx_high_sh = _mm256_shuffle_epi8(levx_high_ext, shuffle2);
574+
levx = _mm256_or_si256(levx_high_sh, levx_low_sh);
573575

574-
// Clip and store in coef
576+
// store in coef
575577
__m128i lev4 = _mm256_castsi256_si128(levx);
576-
__m128i lev5 = _mm_max_epi16(lev4, _mm_set1_epi16(-32768));
577-
__m128i lev6 = _mm_min_epi16(lev5, _mm_set1_epi16(32767));
578-
_mm_storeu_si128((__m128i*)(coef + i), lev6);
578+
_mm_storeu_si128((__m128i*)(coef + i), lev4);
579579
}
580580
return OAPV_OK;
581581
}

src/oapv.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1467,7 +1467,7 @@ int oapve_param_default(oapve_param_t *param)
14671467
param->tile_h_mb = 16;
14681468

14691469
param->profile_idc = OAPV_PROFILE_422_10;
1470-
param->level_idc = (int)(4.1 * 30);
1470+
param->level_idc = (int)((4.1 * 30.0) + 0.5);
14711471
param->band_idc = 2;
14721472

14731473
param->use_q_matrix = 0;

src/oapv_tq.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ void oapv_trans(oapve_ctx_t *ctx, s16 *coef, int log2_w, int log2_h, int bit_dep
9797

9898
static int oapv_quant(s16 *coef, u8 qp, int q_matrix[OAPV_BLK_D], int log2_w, int log2_h, int bit_depth, int deadzone_offset)
9999
{
100+
// coef is the output of the transform, the bit range is 16
101+
// q_matrix has the value of q_scale * 16 / q_matrix, the bit range is 19
102+
// (precision of q_scale is 15, and the range of q_mtrix is 1~255)
103+
// lev is the product of abs(coef) and q_matrix, the bit range is 35
104+
100105
s64 lev;
101106
s32 offset;
102107
int sign;

test/README.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,18 @@
55

66
| No. | Bitstream Name | Description | Profile&nbsp;&nbsp; | Level | Band | Frame Rate | Resolution | # of Frame | MD5 sum of bitstream |
77
|-----|----------------|--------------------------------------------------------------|---------------------|-------|------|------------|------------|------------|----------------------------------|
8-
| 1 | tile_A | one-tile per one-picture | 422-10 | 4.1 | 2 | 60 fps | 3840x2160 | 3 | b6e1cef839381b2c90cb9ffcdf537d77 |
9-
| 2 | tile_B | Tile size = min size tile (256x128) | 422-10 | 4.1 | 2 | 60 fps | 3840x2160 | 3 | 9a0cb5126d705b03a2e7bcdcbacf6fbf |
8+
| 1 | tile_A | one-tile per one-picture | 422-10 | 4.1 | 2 | 60 fps | 3840x2160 | 3 | 74c5c0ca1bd2cfb28c6e2e0673e965f9 |
9+
| 2 | tile_B | Tile size = min size tile (256x128) | 422-10 | 4.1 | 2 | 60 fps | 3840x2160 | 3 | 666ec80235a1e8f59db044d77a89a495 |
1010
| 3 | tile_C | # of Tiles: max num tile (20x20) | 422-10 | 5 | 0 | 30 fps | 7680x4320 | 3 | 75363d036965a9dccc90a9ce8d0ae652 |
11-
| 4 | tile_D | tile dummy data test | 422-10 | 4.1 | 2 | 60 fps | 3840x2160 | 3 | 0394e3ac275e2bc595c07c5290dc9466 |
12-
| 5 | tile_E | tile_size_present_in_fh_flag=on | 422-10 | 4.1 | 2 | 60 fps | 3840x2160 | 3 | fdf72572b6551bc6a9eed7f80ca0ec0f |
11+
| 4 | tile_D | tile dummy data test | 422-10 | 4.1 | 2 | 60 fps | 3840x2160 | 3 | dd492519c90409a9ca5710746f45c125 |
12+
| 5 | tile_E | tile_size_present_in_fh_flag=on | 422-10 | 4.1 | 2 | 60 fps | 3840x2160 | 3 | 134c4aa46cec9ab0299824682a89eecd |
1313
| 6 | qp_A | QP matrix enabled | 422-10 | 4.1 | 2 | 60 fps | 3840x2160 | 3 | 5ca6d4ea0f65add261b44ed3532a0a73 |
14-
| 7 | qp_B | Tile QP variation in a frame | 422-10 | 4.1 | 2 | 60 fps | 3840x2160 | 3 | 1b24d4f97c18545b7881002cc642839b |
14+
| 7 | qp_B | Tile QP variation in a frame | 422-10 | 4.1 | 2 | 60 fps | 3840x2160 | 3 | 85bfa477911447d994c17dea9703a9c7 |
1515
| 8 | qp_C | Set all the QPs in a frame equal to min. QP (=0) | 422-10 | 6 | 2 | 60 fps | 3840x2160 | 3 | 8c2928ec05eb06d42d6a8bda0ceb7e8d |
16-
| 9 | qp_D | Set all the QPs in a frame equal to max. QP (=51) | 422-10 | 4.1 | 2 | 60 fps | 3840x2160 | 3 | e5acd3d3a0aa7bd6a45a49af35980512 |
17-
| 10 | qp_E | Set different QP betwee luma and chroma | 422-10 | 4.1 | 2 | 60 fps | 3840x2160 | 3 | e58ea5df35750c0d19cffefde12e78c4 |
18-
| 11 | syn_A | Exercise a synthetic image with QP = 0 and QP = 51 | 422-10 | 4.1 | 2 | 60 fps | 1920x1080 | 2 | e1593a670c62d69718986ff84d1150f3 |
19-
| 12 | syn_B | Exercise a synthetic image with Tile QP variation in Frame | 422-10 | 4.1 | 2 | 60 fps | 1920x1080 | 2 | 9f188e39824829aa05db584034ab1fd0 |
16+
| 9 | qp_D | Set all the QPs in a frame equal to max. QP (=51) | 422-10 | 4.1 | 2 | 60 fps | 3840x2160 | 3 | 9c98e376fb59100f5a5585482fb33746 |
17+
| 10 | qp_E | Set different QP betwee luma and chroma | 422-10 | 4.1 | 2 | 60 fps | 3840x2160 | 3 | 6d1a1bc982d412758f353c8d041979d1 |
18+
| 11 | syn_A | Exercise a synthetic image with QP = 0 and QP = 51 | 422-10 | 4.1 | 2 | 60 fps | 1920x1080 | 2 | db9f8f7ce57871481e5b257b79149b1e |
19+
| 12 | syn_B | Exercise a synthetic image with Tile QP variation in Frame | 422-10 | 4.1 | 2 | 60 fps | 1920x1080 | 2 | 5f6c57f0bfe7ceb2f97a56a3bec7fb7a |
2020

2121
## Test sequence
2222
"sequence" folder has the uncompressed video sequence for encoder testing.

test/bitstream/qp_B.apv

0 Bytes
Binary file not shown.

test/bitstream/qp_D.apv

0 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)