@@ -112,6 +112,9 @@ namespace ojph {
112112 (dst_line->flags & line_buf::LFT_32BIT) &&
113113 (dst_line->flags & line_buf::LFT_INTEGER));
114114
115+ uint32_t rounding_mode = _MM_GET_ROUNDING_MODE ();
116+ _MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);
117+
115118 const float * sp = src_line->f32 ;
116119 si32* dp = dst_line->i32 + dst_line_offset;
117120 if (bit_depth <= 30 )
@@ -131,7 +134,6 @@ namespace ojph {
131134 {
132135 __m128 t = _mm_loadu_ps (sp);
133136 t = _mm_mul_ps (t, mul);
134- t = _mm_round_ps (t, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
135137 __m128i u = _mm_cvtps_epi32 (t);
136138 u = ojph_mm_max_epi32 (u, lower_limit);
137139 u = ojph_mm_min_epi32 (u, upper_limit);
@@ -150,7 +152,6 @@ namespace ojph {
150152 for (ui32 i = width; i > 0 ; i -= 4 , sp += 4 , dp += 4 ) {
151153 __m128 t = _mm_loadu_ps (sp);
152154 t = _mm_mul_ps (t, mul);
153- t = _mm_round_ps (t, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
154155 __m128i u = _mm_cvtps_epi32 (t);
155156 u = ojph_mm_max_epi32 (u, lower_limit);
156157 u = ojph_mm_min_epi32 (u, upper_limit);
@@ -199,6 +200,8 @@ namespace ojph {
199200 }
200201 }
201202 }
203+
204+ _MM_SET_ROUNDING_MODE (rounding_mode);
202205 }
203206
204207 // ///////////////////////////////////////////////////////////////////////
0 commit comments