Skip to content

Commit 90b3683

Browse files
committed
ofh: fix int16 overflow
ofh: fix
1 parent eedc968 commit 90b3683

File tree

1 file changed

+10
-5
lines changed

1 file changed

+10
-5
lines changed

lib/ofh/compression/avx2_helpers.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,12 @@ using _mm256_comparator = __m256i (*)(__m256i a, __m256i b);
4343
/// absolute value of the two 16bit samples computed thus far and stores them in the output span.
4444
///
4545
/// \tparam COMPARE AVX2 intrinsic used for comparison, either \c mm256_min_epi16 or \c _mm256_max_epi16.
46+
/// \tparam OFFSET Subtracts an offset to the resultant absolute value.
4647
/// \param[out] abs A span of two absolute values of max (or min) samples in each of the two input RBs.
4748
/// \param[in] v0_epi16 AVX2 register storing first four IQ pairs of each of the two RB.
4849
/// \param[in] v1_epi16 AVX2 register storing second four IQ pairs of each of the two RB.
4950
/// \param[in] v2_epi16 AVX2 register storing last four IQ pairs of each of the two RB.
50-
template <_mm256_comparator COMPARE>
51+
template <_mm256_comparator COMPARE, unsigned OFFSET>
5152
inline void find_rbs_abs_min_max_values(span<uint16_t> abs, __m256i v0_epi16, __m256i v1_epi16, __m256i v2_epi16)
5253
{
5354
// Find vertical min/max across every 3 16bit values in each resource block.
@@ -68,6 +69,10 @@ inline void find_rbs_abs_min_max_values(span<uint16_t> abs, __m256i v0_epi16, __
6869
__m256i cmp_rb0_rb1_epi16 = COMPARE(cmp_2val_epi16, cmp_2val_swp_epi16);
6970
__m256i cmp_rb0_rb1_epu16 = _mm256_abs_epi16(cmp_rb0_rb1_epi16);
7071

72+
if (OFFSET != 0) {
73+
cmp_rb0_rb1_epu16 = _mm256_subs_epu16(cmp_rb0_rb1_epu16, _mm256_set1_epi16(OFFSET));
74+
}
75+
7176
// Extract absolute minimum/maximum values for each RB.
7277
abs[0] = _mm256_extract_epi16(cmp_rb0_rb1_epu16, 0);
7378
abs[1] = _mm256_extract_epi16(cmp_rb0_rb1_epu16, 8);
@@ -101,11 +106,11 @@ inline void calculate_max_abs(span<unsigned> max_abs, __m256i rb0_epi16, __m256i
101106
__m256i v2_epi16 = _mm256_permute2f128_si256(rb0_epi16, rb1_epi16, 0x31);
102107
__m256i v1_epi16 = rb01_epi16;
103108

104-
find_rbs_abs_min_max_values<_mm256_min_epi16>(abs_min_values, v0_epi16, v1_epi16, v2_epi16);
105-
find_rbs_abs_min_max_values<_mm256_max_epi16>(abs_max_values, v0_epi16, v1_epi16, v2_epi16);
109+
find_rbs_abs_min_max_values<_mm256_min_epi16, 1>(abs_min_values, v0_epi16, v1_epi16, v2_epi16);
110+
find_rbs_abs_min_max_values<_mm256_max_epi16, 0>(abs_max_values, v0_epi16, v1_epi16, v2_epi16);
106111

107-
max_abs[0] = std::max<unsigned>(abs_max_values[0], abs_min_values[0] - 1U);
108-
max_abs[1] = std::max<unsigned>(abs_max_values[1], abs_min_values[1] - 1U);
112+
max_abs[0] = std::max<unsigned>(abs_max_values[0], abs_min_values[0]);
113+
max_abs[1] = std::max<unsigned>(abs_max_values[1], abs_min_values[1]);
109114
}
110115

111116
} // namespace mm256

0 commit comments

Comments
 (0)