AcademySoftwareFoundation
diff --git a/‎app/oapv_app_enc.c‎
Lines changed: 37 additions & 5 deletions b/‎app/oapv_app_enc.c‎
Lines changed: 37 additions & 5 deletions
diff --git a/‎src/avx/oapv_sad_avx.c‎
Lines changed: 2 additions & 2 deletions b/‎src/avx/oapv_sad_avx.c‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/avx/oapv_sad_avx.h‎
Lines changed: 2 additions & 2 deletions b/‎src/avx/oapv_sad_avx.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/avx/oapv_tq_avx.c‎
Lines changed: 22 additions & 8 deletions b/‎src/avx/oapv_tq_avx.c‎
Lines changed: 22 additions & 8 deletions
diff --git a/‎src/avx/oapv_tq_avx.h‎
Lines changed: 4 additions & 3 deletions b/‎src/avx/oapv_tq_avx.h‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎src/neon/oapv_sad_neon.c‎
Lines changed: 3 additions & 3 deletions b/‎src/neon/oapv_sad_neon.c‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/neon/oapv_sad_neon.h‎
Lines changed: 3 additions & 3 deletions b/‎src/neon/oapv_sad_neon.h‎
Lines changed: 3 additions & 3 deletions
@@ -165,6 +165,10 @@ static const args_opt_t enc_args_opts[] = {
         ARGS_NO_KEY,  "q-matrix-v", ARGS_VAL_TYPE_STRING, 0, NULL,
         "custom quantization matrix for V \"q1 q2 ... q63 q64\""
     },
+    {
+        ARGS_NO_KEY,  "q-matrix-x", ARGS_VAL_TYPE_STRING, 0, NULL,
+        "custom quantization matrix for X \"q1 q2 ... q63 q64\""
+    },
     {
         ARGS_NO_KEY,  "hash", ARGS_VAL_TYPE_NONE, 0, NULL,
         "embed frame hash value for conformance checking in decoding"
@@ -460,7 +464,7 @@ static int update_param(args_var_t *vars, oapve_param_t *param)
         int   len_cnt = 0;
         while(len_cnt < len_y && cnt < OAPV_BLK_D) {
             sscanf(tmp, "%d", &param->q_matrix_y[cnt]);
-            if(param->q_matrix_y[cnt] < 1 || param->q_matrix_y[cnt] > 256) {
+            if(param->q_matrix_y[cnt] < 1 || param->q_matrix_y[cnt] > 255) {
                 logerr("input value of q_matrix_y is invalid\n");
                 return -1;
             }
@@ -482,7 +486,7 @@ static int update_param(args_var_t *vars, oapve_param_t *param)
         int   len_cnt = 0;
         while(len_cnt < len_u && cnt < OAPV_BLK_D) {
             sscanf(tmp, "%d", &param->q_matrix_u[cnt]);
-            if(param->q_matrix_u[cnt] < 1 || param->q_matrix_u[cnt] > 256) {
+            if(param->q_matrix_u[cnt] < 1 || param->q_matrix_u[cnt] > 255) {
                 logerr("input value of q_matrix_u is invalid\n");
                 return -1;
             }
@@ -504,7 +508,7 @@ static int update_param(args_var_t *vars, oapve_param_t *param)
         int   len_cnt = 0;
         while(len_cnt < len_v && cnt < OAPV_BLK_D) {
             sscanf(tmp, "%d", &param->q_matrix_v[cnt]);
-            if(param->q_matrix_v[cnt] < 1 || param->q_matrix_v[cnt] > 256) {
+            if(param->q_matrix_v[cnt] < 1 || param->q_matrix_v[cnt] > 255) {
                 logerr("input value of q_matrix_v is invalid\n");
                 return -1;
             }
@@ -518,6 +522,28 @@ static int update_param(args_var_t *vars, oapve_param_t *param)
         }
     }
 
+    int len_x = (int)strlen(vars->q_matrix_x);
+    if (len_x > 0) {
+        param->use_q_matrix = 1;
+        char* tmp = vars->q_matrix_x;
+        int   cnt = 0;
+        int   len_cnt = 0;
+        while (len_cnt < len_x && cnt < OAPV_BLK_D) {
+            sscanf(tmp, "%d", &param->q_matrix_x[cnt]);
+            if (param->q_matrix_x[cnt] < 1 || param->q_matrix_x[cnt] > 255) {
+                logerr("input value of q_matrix_x is invalid\n");
+                return -1;
+            }
+            len_cnt += (int)log10(param->q_matrix_x[cnt]) + 2;
+            tmp = vars->q_matrix_x + len_cnt;
+            cnt++;
+        }
+        if (cnt < OAPV_BLK_D) {
+            logerr("input number of q_matrix_x is not enough\n");
+            return -1;
+        }
+    }
+
     if(param->use_q_matrix) {
         if(len_y == 0) {
             for(int i = 0; i < OAPV_BLK_D; i++) {
@@ -536,6 +562,12 @@ static int update_param(args_var_t *vars, oapve_param_t *param)
                 param->q_matrix_v[i] = 16;
             }
         }
+
+        if (len_x == 0) {
+            for (int i = 0; i < OAPV_BLK_D; i++) {
+                param->q_matrix_x[i] = 16;
+            }
+        }
     }
 
     param->csp = vars->input_csp;
@@ -607,8 +639,8 @@ int main(int argc, const char **argv)
     oapv_imgb_t   *imgb_w = NULL; // image buffer for write
     oapv_imgb_t   *imgb_i = NULL; // image buffer for input
     oapv_imgb_t   *imgb_o = NULL; // image buffer for output
-    oapv_frms_t    ifrms;         // frames for input
-    oapv_frms_t    rfrms;         // frames for reconstruction
+    oapv_frms_t    ifrms = { 0 }; // frames for input
+    oapv_frms_t    rfrms = { 0 }; // frames for reconstruction
     int            ret;
     oapv_clk_t     clk_beg, clk_end, clk_tot;
     oapv_mtime_t   au_cnt, au_skip;
 
@@ -32,7 +32,7 @@
 #include "oapv_sad_avx.h"
 
 #if X86_SSE
-const oapv_fn_sad_t oapv_tbl_sad_16b_avx[2] =
+const oapv_fn_sad_t oapv_tbl_fn_sad_16b_avx[2] =
 {
     oapv_sad_16b_sse_8x2n,
         NULL
@@ -58,7 +58,7 @@ static s64 ssd_16b_sse_8x8_avx(int w, int h, void* src1, void* src2, int s_src1,
     return t[0] + t[1] + t[2] + t[3];
 }
 
-const oapv_fn_ssd_t oapv_tbl_ssd_16b_avx[2] =
+const oapv_fn_ssd_t oapv_tbl_fn_ssd_16b_avx[2] =
 {
     ssd_16b_sse_8x8_avx,
         NULL
 
@@ -36,8 +36,8 @@
 #include <immintrin.h>
 
 #if X86_SSE
-extern const oapv_fn_sad_t oapv_tbl_sad_16b_avx[2];
-extern const oapv_fn_ssd_t oapv_tbl_ssd_16b_avx[2];
+extern const oapv_fn_sad_t oapv_tbl_fn_sad_16b_avx[2];
+extern const oapv_fn_ssd_t oapv_tbl_fn_ssd_16b_avx[2];
 #endif /* X86_SSE */
 
 #endif /* _OAPV_SAD_AVX_H_ */
 
@@ -43,7 +43,7 @@
     _mm256_set_m128i(_mm_loadu_si128(hiaddr), _mm_loadu_si128(loaddr))
 #endif // !_mm256_loadu2_m128i
 
-static void oapv_tx_pb8b_avx(s16 *src, s16 *dst, int shift, int line)
+static void oapv_tx_part_avx(s16 *src, s16 *dst, int shift, int line)
 {
     __m256i v0, v1, v2, v3, v4, v5, v6, v7;
     __m256i d0, d1, d2, d3;
@@ -96,9 +96,9 @@ static void oapv_tx_pb8b_avx(s16 *src, s16 *dst, int shift, int line)
     _mm_store_si128((__m128i *)(dst + 7 * line), _mm256_extracti128_si256(d1, 1));
 }
 
-const oapv_fn_tx_t oapv_tbl_txb_avx[2] =
+const oapv_fn_tx_t oapv_tbl_fn_txb_avx[2] =
 {
-    oapv_tx_pb8b_avx,
+    oapv_tx_part_avx,
         NULL
 };
 
@@ -160,7 +160,7 @@ const oapv_fn_tx_t oapv_tbl_txb_avx[2] =
 #define set_vals(a,b) b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a
 #define set_vals1(a,b) b, a, b, a, b, a, b, a
 
-static void oapv_itx_pb8b_avx(s16* src, s16* dst, int shift, int line)
+static void oapv_itx_part_avx(s16* src, s16* dst, int shift, int line)
 {
     const __m256i coeff_p89_p75 = _mm256_setr_epi16(89, 75, 89, 75, 89, 75, 89, 75, 89, 75, 89, 75, 89, 75, 89, 75); // 89 75
     const __m256i coeff_p50_p18 = _mm256_setr_epi16(50, 18, 50, 18, 50, 18, 50, 18, 50, 18, 50, 18, 50, 18, 50, 18); // 50, 18
@@ -282,13 +282,27 @@ static void oapv_itx_pb8b_avx(s16* src, s16* dst, int shift, int line)
     }
 }
 
+const oapv_fn_itx_part_t oapv_tbl_fn_itx_part_avx[2] =
+{
+    oapv_itx_part_avx,
+        NULL
+};
+
+static void oapv_itx_avx(s16* src, int shift1, int shift2, int line)
+{
+    // To Do: Merge 2 passes and optimize AVX further
+    ALIGNED_16(s16 dst[OAPV_BLK_D]);
+    oapv_itx_part_avx(src, dst, shift1, line);
+    oapv_itx_part_avx(dst, src, shift2, line);
+}
+
 const oapv_fn_itx_t oapv_tbl_fn_itx_avx[2] =
 {
-    oapv_itx_pb8b_avx,
+    oapv_itx_avx,
         NULL
 };
 
-static int oapv_quant_nnz_avx(u8 qp, int q_matrix[OAPV_BLK_D], s16 *coef, int log2_w, int log2_h,
+static int oapv_quant_nnz_avx(s16 *coef, u8 qp, int q_matrix[OAPV_BLK_D], int log2_w, int log2_h,
                              u16 scale, int ch_type, int bit_depth, int deadzone_offset)
 {
     int nnz = 0;
@@ -344,7 +358,7 @@ static int oapv_quant_nnz_avx(u8 qp, int q_matrix[OAPV_BLK_D], s16 *coef, int lo
     return nnz;
 }
 
-const oapv_fn_quant_t oapv_tbl_quantb_avx[2] =
+const oapv_fn_quant_old_t oapv_tbl_quant_avx[2] =
 {
     oapv_quant_nnz_avx,
         NULL
@@ -409,7 +423,7 @@ static void oapv_dquant_avx(s16 *coef, int q_matrix[OAPV_BLK_D], int log2_w, int
         }
     }
 }
-const oapv_fn_dquant_t oapv_tbl_fn_dquant_avx[2] =
+const oapv_fn_dquant_old_t oapv_tbl_fn_dquant_avx[2] =
     {
         oapv_dquant_avx,
             NULL,
 
@@ -129,10 +129,11 @@
 
 
 #if X86_SSE
-extern const oapv_fn_tx_t oapv_tbl_txb_avx[2];
-extern const oapv_fn_quant_t oapv_tbl_quantb_avx[2];
+extern const oapv_fn_tx_t oapv_tbl_fn_txb_avx[2];
+extern const oapv_fn_quant_old_t oapv_tbl_fn_quant_avx[2];
+extern const oapv_fn_itx_part_t oapv_tbl_fn_itx_part_avx[2];
 extern const oapv_fn_itx_t oapv_tbl_fn_itx_avx[2];
-extern const oapv_fn_dquant_t oapv_tbl_fn_dquant_avx[2];
+extern const oapv_fn_dquant_old_t oapv_tbl_fn_dquant_avx[2];
 extern const oapv_fn_itx_adj_t oapv_tbl_fn_itx_adj_avx[2];
 #endif /* X86_SSE */
 
 
@@ -108,7 +108,7 @@ int sad_16b_neon_8x2n(int w, int h, void *src1, void *src2, int s_src1, int s_sr
     return (sad);
 }
 
-const oapv_fn_sad_t oapv_tbl_sad_16b_neon[2] =
+const oapv_fn_sad_t oapv_tbl_fn_sad_16b_neon[2] =
     {
         sad_16b_neon_8x2n,
             NULL
@@ -139,7 +139,7 @@ static void diff_16b_neon_8x8(int w, int h, void *src1, void *src2, int s_src1,
     SSE_DIFF_16B_8PEL(s1 + s_src1 * 6, s2 + s_src2 * 6, diff + s_diff * 6, m07, m08, m09);
     SSE_DIFF_16B_8PEL(s1 + s_src1 * 7, s2 + s_src2 * 7, diff + s_diff * 7, m10, m11, m12);
 }
-const oapv_fn_diff_t oapv_tbl_diff_16b_neon[2] =
+const oapv_fn_diff_t oapv_tbl_fn_diff_16b_neon[2] =
     {
         diff_16b_neon_8x8,
             NULL};
@@ -192,7 +192,7 @@ static s64 ssd_16b_neon_8x8(int w, int h, void *src1, void *src2, int s_src1, in
     return ssd;
 }
 
-const oapv_fn_ssd_t oapv_tbl_ssd_16b_neon[2] =
+const oapv_fn_ssd_t oapv_tbl_fn_ssd_16b_neon[2] =
     {
         ssd_16b_neon_8x8,
             NULL};
 
@@ -36,9 +36,9 @@
 #include "oapv_sad.h"
 
 #if ARM_NEON
-extern const oapv_fn_sad_t oapv_tbl_sad_16b_neon[2];
-extern const oapv_fn_ssd_t oapv_tbl_ssd_16b_neon[2];
-extern const oapv_fn_diff_t oapv_tbl_diff_16b_neon[2];
+extern const oapv_fn_sad_t oapv_tbl_fn_sad_16b_neon[2];
+extern const oapv_fn_ssd_t oapv_tbl_fn_ssd_16b_neon[2];
+extern const oapv_fn_diff_t oapv_tbl_fn_diff_16b_neon[2];
 
 int oapv_dc_removed_had8x8_neon(pel* org, int s_org);
 #endif /* ARM_NEON */
Original file line number	Diff line number	Diff line change
`@@ -32,7 +32,7 @@`
`32`	`32`	`#include "oapv_sad_avx.h"`
`33`	`33`
`34`	`34`	`#if X86_SSE`
`35`		`-const oapv_fn_sad_t oapv_tbl_sad_16b_avx[2] =`
	`35`	`+const oapv_fn_sad_t oapv_tbl_fn_sad_16b_avx[2] =`
`36`	`36`	`{`
`37`	`37`	`oapv_sad_16b_sse_8x2n,`
`38`	`38`	`NULL`
`@@ -58,7 +58,7 @@ static s64 ssd_16b_sse_8x8_avx(int w, int h, void* src1, void* src2, int s_src1,`
`58`	`58`	`return t[0] + t[1] + t[2] + t[3];`
`59`	`59`	`}`
`60`	`60`
`61`		`-const oapv_fn_ssd_t oapv_tbl_ssd_16b_avx[2] =`
	`61`	`+const oapv_fn_ssd_t oapv_tbl_fn_ssd_16b_avx[2] =`
`62`	`62`	`{`
`63`	`63`	`ssd_16b_sse_8x8_avx,`
`64`	`64`	`NULL`
Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,7 @@`
`43`	`43`	`_mm256_set_m128i(_mm_loadu_si128(hiaddr), _mm_loadu_si128(loaddr))`
`44`	`44`	`#endif // !_mm256_loadu2_m128i`
`45`	`45`
`46`		`-static void oapv_tx_pb8b_avx(s16 src, s16 dst, int shift, int line)`
	`46`	`+static void oapv_tx_part_avx(s16 src, s16 dst, int shift, int line)`
`47`	`47`	`{`
`48`	`48`	`__m256i v0, v1, v2, v3, v4, v5, v6, v7;`
`49`	`49`	`__m256i d0, d1, d2, d3;`
`@@ -96,9 +96,9 @@ static void oapv_tx_pb8b_avx(s16 src, s16 dst, int shift, int line)`
`96`	`96`	`_mm_store_si128((__m128i )(dst + 7 line), _mm256_extracti128_si256(d1, 1));`
`97`	`97`	`}`
`98`	`98`
`99`		`-const oapv_fn_tx_t oapv_tbl_txb_avx[2] =`
	`99`	`+const oapv_fn_tx_t oapv_tbl_fn_txb_avx[2] =`
`100`	`100`	`{`
`101`		`- oapv_tx_pb8b_avx,`
	`101`	`+ oapv_tx_part_avx,`
`102`	`102`	`NULL`
`103`	`103`	`};`
`104`	`104`
`@@ -160,7 +160,7 @@ const oapv_fn_tx_t oapv_tbl_txb_avx[2] =`
`160`	`160`	`#define set_vals(a,b) b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a`
`161`	`161`	`#define set_vals1(a,b) b, a, b, a, b, a, b, a`
`162`	`162`
`163`		`-static void oapv_itx_pb8b_avx(s16* src, s16* dst, int shift, int line)`
	`163`	`+static void oapv_itx_part_avx(s16* src, s16* dst, int shift, int line)`
`164`	`164`	`{`
`165`	`165`	`const __m256i coeff_p89_p75 = _mm256_setr_epi16(89, 75, 89, 75, 89, 75, 89, 75, 89, 75, 89, 75, 89, 75, 89, 75); // 89 75`
`166`	`166`	`const __m256i coeff_p50_p18 = _mm256_setr_epi16(50, 18, 50, 18, 50, 18, 50, 18, 50, 18, 50, 18, 50, 18, 50, 18); // 50, 18`
`@@ -282,13 +282,27 @@ static void oapv_itx_pb8b_avx(s16* src, s16* dst, int shift, int line)`
`282`	`282`	`}`
`283`	`283`	`}`
`284`	`284`
	`285`	`+const oapv_fn_itx_part_t oapv_tbl_fn_itx_part_avx[2] =`
	`286`	`+{`
	`287`	`+ oapv_itx_part_avx,`
	`288`	`+ NULL`
	`289`	`+};`
	`290`	`+`
	`291`	`+static void oapv_itx_avx(s16* src, int shift1, int shift2, int line)`
	`292`	`+{`
	`293`	`+ // To Do: Merge 2 passes and optimize AVX further`
	`294`	`+ ALIGNED_16(s16 dst[OAPV_BLK_D]);`
	`295`	`+ oapv_itx_part_avx(src, dst, shift1, line);`
	`296`	`+ oapv_itx_part_avx(dst, src, shift2, line);`
	`297`	`+}`
	`298`	`+`
`285`	`299`	`const oapv_fn_itx_t oapv_tbl_fn_itx_avx[2] =`
`286`	`300`	`{`
`287`		`- oapv_itx_pb8b_avx,`
	`301`	`+ oapv_itx_avx,`
`288`	`302`	`NULL`
`289`	`303`	`};`
`290`	`304`
`291`		`-static int oapv_quant_nnz_avx(u8 qp, int q_matrix[OAPV_BLK_D], s16 *coef, int log2_w, int log2_h,`
	`305`	`+static int oapv_quant_nnz_avx(s16 *coef, u8 qp, int q_matrix[OAPV_BLK_D], int log2_w, int log2_h,`
`292`	`306`	`u16 scale, int ch_type, int bit_depth, int deadzone_offset)`
`293`	`307`	`{`
`294`	`308`	`int nnz = 0;`
`@@ -344,7 +358,7 @@ static int oapv_quant_nnz_avx(u8 qp, int q_matrix[OAPV_BLK_D], s16 *coef, int lo`
`344`	`358`	`return nnz;`
`345`	`359`	`}`
`346`	`360`
`347`		`-const oapv_fn_quant_t oapv_tbl_quantb_avx[2] =`
	`361`	`+const oapv_fn_quant_old_t oapv_tbl_quant_avx[2] =`
`348`	`362`	`{`
`349`	`363`	`oapv_quant_nnz_avx,`
`350`	`364`	`NULL`
`@@ -409,7 +423,7 @@ static void oapv_dquant_avx(s16 *coef, int q_matrix[OAPV_BLK_D], int log2_w, int`
`409`	`423`	`}`
`410`	`424`	`}`
`411`	`425`	`}`
`412`		`-const oapv_fn_dquant_t oapv_tbl_fn_dquant_avx[2] =`
	`426`	`+const oapv_fn_dquant_old_t oapv_tbl_fn_dquant_avx[2] =`
`413`	`427`	`{`
`414`	`428`	`oapv_dquant_avx,`
`415`	`429`	`NULL,`
Original file line number	Diff line number	Diff line change
`@@ -108,7 +108,7 @@ int sad_16b_neon_8x2n(int w, int h, void src1, void src2, int s_src1, int s_sr`
`108`	`108`	`return (sad);`
`109`	`109`	`}`
`110`	`110`
`111`		`-const oapv_fn_sad_t oapv_tbl_sad_16b_neon[2] =`
	`111`	`+const oapv_fn_sad_t oapv_tbl_fn_sad_16b_neon[2] =`
`112`	`112`	`{`
`113`	`113`	`sad_16b_neon_8x2n,`
`114`	`114`	`NULL`
`@@ -139,7 +139,7 @@ static void diff_16b_neon_8x8(int w, int h, void src1, void src2, int s_src1,`
`139`	`139`	`SSE_DIFF_16B_8PEL(s1 + s_src1 * 6, s2 + s_src2 * 6, diff + s_diff * 6, m07, m08, m09);`
`140`	`140`	`SSE_DIFF_16B_8PEL(s1 + s_src1 * 7, s2 + s_src2 * 7, diff + s_diff * 7, m10, m11, m12);`
`141`	`141`	`}`
`142`		`-const oapv_fn_diff_t oapv_tbl_diff_16b_neon[2] =`
	`142`	`+const oapv_fn_diff_t oapv_tbl_fn_diff_16b_neon[2] =`
`143`	`143`	`{`
`144`	`144`	`diff_16b_neon_8x8,`
`145`	`145`	`NULL};`
`@@ -192,7 +192,7 @@ static s64 ssd_16b_neon_8x8(int w, int h, void src1, void src2, int s_src1, in`
`192`	`192`	`return ssd;`
`193`	`193`	`}`
`194`	`194`
`195`		`-const oapv_fn_ssd_t oapv_tbl_ssd_16b_neon[2] =`
	`195`	`+const oapv_fn_ssd_t oapv_tbl_fn_ssd_16b_neon[2] =`
`196`	`196`	`{`
`197`	`197`	`ssd_16b_neon_8x8,`
`198`	`198`	`NULL};`