@@ -43,11 +43,12 @@ using _mm256_comparator = __m256i (*)(__m256i a, __m256i b);
4343// / absolute value of the two 16bit samples computed thus far and stores them in the output span.
4444// /
4545// / \tparam COMPARE AVX2 intrinsic used for comparison, either \c mm256_min_epi16 or \c _mm256_max_epi16.
46+ // / \tparam OFFSET Subtracts an offset to the resultant absolute value.
4647// / \param[out] abs A span of two absolute values of max (or min) samples in each of the two input RBs.
4748// / \param[in] v0_epi16 AVX2 register storing first four IQ pairs of each of the two RB.
4849// / \param[in] v1_epi16 AVX2 register storing second four IQ pairs of each of the two RB.
4950// / \param[in] v2_epi16 AVX2 register storing last four IQ pairs of each of the two RB.
50- template <_mm256_comparator COMPARE>
51+ template <_mm256_comparator COMPARE, unsigned OFFSET >
5152inline void find_rbs_abs_min_max_values (span<uint16_t > abs, __m256i v0_epi16, __m256i v1_epi16, __m256i v2_epi16)
5253{
5354 // Find vertical min/max across every 3 16bit values in each resource block.
@@ -68,6 +69,10 @@ inline void find_rbs_abs_min_max_values(span<uint16_t> abs, __m256i v0_epi16, __
6869 __m256i cmp_rb0_rb1_epi16 = COMPARE (cmp_2val_epi16, cmp_2val_swp_epi16);
6970 __m256i cmp_rb0_rb1_epu16 = _mm256_abs_epi16 (cmp_rb0_rb1_epi16);
7071
72+ if (OFFSET != 0 ) {
73+ cmp_rb0_rb1_epu16 = _mm256_subs_epu16 (cmp_rb0_rb1_epu16, _mm256_set1_epi16 (OFFSET));
74+ }
75+
7176 // Extract absolute minimum/maximum values for each RB.
7277 abs[0 ] = _mm256_extract_epi16 (cmp_rb0_rb1_epu16, 0 );
7378 abs[1 ] = _mm256_extract_epi16 (cmp_rb0_rb1_epu16, 8 );
@@ -101,11 +106,11 @@ inline void calculate_max_abs(span<unsigned> max_abs, __m256i rb0_epi16, __m256i
101106 __m256i v2_epi16 = _mm256_permute2f128_si256 (rb0_epi16, rb1_epi16, 0x31 );
102107 __m256i v1_epi16 = rb01_epi16;
103108
104- find_rbs_abs_min_max_values<_mm256_min_epi16>(abs_min_values, v0_epi16, v1_epi16, v2_epi16);
105- find_rbs_abs_min_max_values<_mm256_max_epi16>(abs_max_values, v0_epi16, v1_epi16, v2_epi16);
109+ find_rbs_abs_min_max_values<_mm256_min_epi16, 1 >(abs_min_values, v0_epi16, v1_epi16, v2_epi16);
110+ find_rbs_abs_min_max_values<_mm256_max_epi16, 0 >(abs_max_values, v0_epi16, v1_epi16, v2_epi16);
106111
107- max_abs[0 ] = std::max<unsigned >(abs_max_values[0 ], abs_min_values[0 ] - 1U );
108- max_abs[1 ] = std::max<unsigned >(abs_max_values[1 ], abs_min_values[1 ] - 1U );
112+ max_abs[0 ] = std::max<unsigned >(abs_max_values[0 ], abs_min_values[0 ]);
113+ max_abs[1 ] = std::max<unsigned >(abs_max_values[1 ], abs_min_values[1 ]);
109114}
110115
111116} // namespace mm256
0 commit comments