|
43 | 43 | _mm256_set_m128i(_mm_loadu_si128(hiaddr), _mm_loadu_si128(loaddr)) |
44 | 44 | #endif // !_mm256_loadu2_m128i |
45 | 45 |
|
46 | | -static void oapv_tx_pb8b_avx(s16 *src, s16 *dst, int shift, int line) |
| 46 | +static void oapv_tx_part_avx(s16 *src, s16 *dst, int shift, int line) |
47 | 47 | { |
48 | 48 | __m256i v0, v1, v2, v3, v4, v5, v6, v7; |
49 | 49 | __m256i d0, d1, d2, d3; |
@@ -96,9 +96,9 @@ static void oapv_tx_pb8b_avx(s16 *src, s16 *dst, int shift, int line) |
96 | 96 | _mm_store_si128((__m128i *)(dst + 7 * line), _mm256_extracti128_si256(d1, 1)); |
97 | 97 | } |
98 | 98 |
|
99 | | -const oapv_fn_tx_t oapv_tbl_txb_avx[2] = |
| 99 | +const oapv_fn_tx_t oapv_tbl_fn_txb_avx[2] = |
100 | 100 | { |
101 | | - oapv_tx_pb8b_avx, |
| 101 | + oapv_tx_part_avx, |
102 | 102 | NULL |
103 | 103 | }; |
104 | 104 |
|
@@ -160,7 +160,7 @@ const oapv_fn_tx_t oapv_tbl_txb_avx[2] = |
160 | 160 | #define set_vals(a,b) b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a |
161 | 161 | #define set_vals1(a,b) b, a, b, a, b, a, b, a |
162 | 162 |
|
163 | | -static void oapv_itx_pb8b_avx(s16* src, s16* dst, int shift, int line) |
| 163 | +static void oapv_itx_part_avx(s16* src, s16* dst, int shift, int line) |
164 | 164 | { |
165 | 165 | const __m256i coeff_p89_p75 = _mm256_setr_epi16(89, 75, 89, 75, 89, 75, 89, 75, 89, 75, 89, 75, 89, 75, 89, 75); // 89 75 |
166 | 166 | const __m256i coeff_p50_p18 = _mm256_setr_epi16(50, 18, 50, 18, 50, 18, 50, 18, 50, 18, 50, 18, 50, 18, 50, 18); // 50, 18 |
@@ -282,13 +282,27 @@ static void oapv_itx_pb8b_avx(s16* src, s16* dst, int shift, int line) |
282 | 282 | } |
283 | 283 | } |
284 | 284 |
|
| 285 | +const oapv_fn_itx_part_t oapv_tbl_fn_itx_part_avx[2] = |
| 286 | +{ |
| 287 | + oapv_itx_part_avx, |
| 288 | + NULL |
| 289 | +}; |
| 290 | + |
| 291 | +static void oapv_itx_avx(s16* src, int shift1, int shift2, int line) |
| 292 | +{ |
| 293 | + // To Do: Merge 2 passes and optimize AVX further |
| 294 | + ALIGNED_16(s16 dst[OAPV_BLK_D]); |
| 295 | + oapv_itx_part_avx(src, dst, shift1, line); |
| 296 | + oapv_itx_part_avx(dst, src, shift2, line); |
| 297 | +} |
| 298 | + |
285 | 299 | const oapv_fn_itx_t oapv_tbl_fn_itx_avx[2] = |
286 | 300 | { |
287 | | - oapv_itx_pb8b_avx, |
| 301 | + oapv_itx_avx, |
288 | 302 | NULL |
289 | 303 | }; |
290 | 304 |
|
291 | | -static int oapv_quant_nnz_avx(u8 qp, int q_matrix[OAPV_BLK_D], s16 *coef, int log2_w, int log2_h, |
| 305 | +static int oapv_quant_nnz_avx(s16 *coef, u8 qp, int q_matrix[OAPV_BLK_D], int log2_w, int log2_h, |
292 | 306 | u16 scale, int ch_type, int bit_depth, int deadzone_offset) |
293 | 307 | { |
294 | 308 | int nnz = 0; |
@@ -344,7 +358,7 @@ static int oapv_quant_nnz_avx(u8 qp, int q_matrix[OAPV_BLK_D], s16 *coef, int lo |
344 | 358 | return nnz; |
345 | 359 | } |
346 | 360 |
|
347 | | -const oapv_fn_quant_t oapv_tbl_quantb_avx[2] = |
| 361 | +const oapv_fn_quant_old_t oapv_tbl_quant_avx[2] = |
348 | 362 | { |
349 | 363 | oapv_quant_nnz_avx, |
350 | 364 | NULL |
@@ -409,7 +423,7 @@ static void oapv_dquant_avx(s16 *coef, int q_matrix[OAPV_BLK_D], int log2_w, int |
409 | 423 | } |
410 | 424 | } |
411 | 425 | } |
412 | | -const oapv_fn_dquant_t oapv_tbl_fn_dquant_avx[2] = |
| 426 | +const oapv_fn_dquant_old_t oapv_tbl_fn_dquant_avx[2] = |
413 | 427 | { |
414 | 428 | oapv_dquant_avx, |
415 | 429 | NULL, |
|
0 commit comments