@@ -63,21 +63,16 @@ namespace ojph {
6363 line_buf *dst_line, const ui32 dst_line_offset,
6464 si64 shift, ui32 width) = NULL ;
6565
66- // ////////////////////////////////////////////////////////////////////////
67- void (*cnvrt_si32_to_float_shftd)
68- (const si32 *sp, float *dp, float mul, ui32 width) = NULL ;
69-
70- // ////////////////////////////////////////////////////////////////////////
71- void (*cnvrt_si32_to_float)
72- (const si32 *sp, float *dp, float mul, ui32 width) = NULL ;
7366
7467 // ////////////////////////////////////////////////////////////////////////
75- void (*cnvrt_float_to_si32_shftd)
76- (const float *sp, si32 *dp, float mul, ui32 width) = NULL ;
68+ void (*irv_convert_to_integer) (
69+ const line_buf *src_line, line_buf *dst_line, ui32 dst_line_offset,
70+ ui32 bit_depth, bool is_signed, ui32 width) = NULL ;
7771
7872 // ////////////////////////////////////////////////////////////////////////
79- void (*cnvrt_float_to_si32)
80- (const float *sp, si32 *dp, float mul, ui32 width) = NULL ;
73+ void (*irv_convert_to_float) (
74+ const line_buf *src_line, ui32 src_line_offset,
75+ line_buf *dst_line, ui32 bit_depth, bool is_signed, ui32 width) = NULL ;
8176
8277 // ////////////////////////////////////////////////////////////////////////
8378 void (*irv_convert_to_integer_nlt_type3) (
@@ -122,12 +117,10 @@ namespace ojph {
122117
123118 rev_convert = gen_rev_convert;
124119 rev_convert_nlt_type3 = gen_rev_convert_nlt_type3;
125- cnvrt_si32_to_float_shftd = gen_cnvrt_si32_to_float_shftd;
126- cnvrt_si32_to_float = gen_cnvrt_si32_to_float;
127- cnvrt_float_to_si32_shftd = gen_cnvrt_float_to_si32_shftd;
128- cnvrt_float_to_si32 = gen_cnvrt_float_to_si32;
129- irv_convert_to_float_nlt_type3 = gen_irv_convert_to_float_nlt_type3;
120+ irv_convert_to_integer = gen_irv_convert_to_integer;
121+ irv_convert_to_float = gen_irv_convert_to_float;
130122 irv_convert_to_integer_nlt_type3 = gen_irv_convert_to_integer_nlt_type3;
123+ irv_convert_to_float_nlt_type3 = gen_irv_convert_to_float_nlt_type3;
131124 rct_forward = gen_rct_forward;
132125 rct_backward = gen_rct_backward;
133126 ict_forward = gen_ict_forward;
@@ -140,10 +133,6 @@ namespace ojph {
140133 #ifndef OJPH_DISABLE_SSE
141134 if (get_cpu_ext_level () >= X86_CPU_EXT_LEVEL_SSE)
142135 {
143- cnvrt_si32_to_float_shftd = sse_cnvrt_si32_to_float_shftd;
144- cnvrt_si32_to_float = sse_cnvrt_si32_to_float;
145- cnvrt_float_to_si32_shftd = sse_cnvrt_float_to_si32_shftd;
146- cnvrt_float_to_si32 = sse_cnvrt_float_to_si32;
147136 ict_forward = sse_ict_forward;
148137 ict_backward = sse_ict_backward;
149138 }
@@ -154,8 +143,8 @@ namespace ojph {
154143 {
155144 rev_convert = sse2_rev_convert;
156145 rev_convert_nlt_type3 = sse2_rev_convert_nlt_type3;
157- cnvrt_float_to_si32_shftd = sse2_cnvrt_float_to_si32_shftd ;
158- cnvrt_float_to_si32 = sse2_cnvrt_float_to_si32 ;
146+ irv_convert_to_integer = sse2_irv_convert_to_integer ;
147+ irv_convert_to_float = sse2_irv_convert_to_float ;
159148 irv_convert_to_integer_nlt_type3 =
160149 sse2_irv_convert_to_integer_nlt_type3;
161150 irv_convert_to_float_nlt_type3 =
@@ -168,10 +157,6 @@ namespace ojph {
168157 #ifndef OJPH_DISABLE_AVX
169158 if (get_cpu_ext_level () >= X86_CPU_EXT_LEVEL_AVX)
170159 {
171- cnvrt_si32_to_float_shftd = avx_cnvrt_si32_to_float_shftd;
172- cnvrt_si32_to_float = avx_cnvrt_si32_to_float;
173- cnvrt_float_to_si32_shftd = avx_cnvrt_float_to_si32_shftd;
174- cnvrt_float_to_si32 = avx_cnvrt_float_to_si32;
175160 ict_forward = avx_ict_forward;
176161 ict_backward = avx_ict_backward;
177162 }
@@ -182,6 +167,8 @@ namespace ojph {
182167 {
183168 rev_convert = avx2_rev_convert;
184169 rev_convert_nlt_type3 = avx2_rev_convert_nlt_type3;
170+ irv_convert_to_integer = avx2_irv_convert_to_integer;
171+ irv_convert_to_float = avx2_irv_convert_to_float;
185172 irv_convert_to_integer_nlt_type3 =
186173 avx2_irv_convert_to_integer_nlt_type3;
187174 irv_convert_to_float_nlt_type3 =
@@ -201,10 +188,8 @@ namespace ojph {
201188
202189 rev_convert = wasm_rev_convert;
203190 rev_convert_nlt_type3 = wasm_rev_convert_nlt_type3;
204- cnvrt_si32_to_float_shftd = wasm_cnvrt_si32_to_float_shftd;
205- cnvrt_si32_to_float = wasm_cnvrt_si32_to_float;
206- cnvrt_float_to_si32_shftd = wasm_cnvrt_float_to_si32_shftd;
207- cnvrt_float_to_si32 = wasm_cnvrt_float_to_si32;
191+ irv_convert_to_integer = wasm_irv_convert_to_integer;
192+ irv_convert_to_float = wasm_irv_convert_to_float;
208193 irv_convert_to_integer_nlt_type3 = wasm_irv_convert_to_integer_nlt_type3;
209194 irv_convert_to_float_nlt_type3 = wasm_irv_convert_to_float_nlt_type3;
210195 rct_forward = wasm_rct_forward;
@@ -310,40 +295,11 @@ namespace ojph {
310295 }
311296 }
312297
313- // ////////////////////////////////////////////////////////////////////////
314- void gen_cnvrt_si32_to_float_shftd (const si32 *sp, float *dp, float mul,
315- ui32 width)
316- {
317- for (ui32 i = width; i > 0 ; --i)
318- *dp++ = (float )(ui32)*sp++ * mul - 0 .5f ;
319- }
320-
321- // ////////////////////////////////////////////////////////////////////////
322- void gen_cnvrt_si32_to_float (const si32 *sp, float *dp, float mul,
323- ui32 width)
324- {
325- for (ui32 i = width; i > 0 ; --i)
326- *dp++ = (float )*sp++ * mul;
327- }
328-
329- // ////////////////////////////////////////////////////////////////////////
330- void gen_cnvrt_float_to_si32_shftd (const float *sp, si32 *dp, float mul,
331- ui32 width)
332- {
333- for (ui32 i = width; i > 0 ; --i)
334- *dp++ = (si32)ojph_round ((*sp++ + 0 .5f ) * mul);
335- }
336-
337- // ////////////////////////////////////////////////////////////////////////
338- void gen_cnvrt_float_to_si32 (const float *sp, si32 *dp, float mul,
339- ui32 width)
340- {
341- for (ui32 i = width; i > 0 ; --i)
342- *dp++ = ojph_round (*sp++ * mul);
343- }
344298
345299 // ////////////////////////////////////////////////////////////////////////
346- void gen_irv_convert_to_integer_nlt_type3 (const line_buf *src_line,
300+ template <bool NLT_TYPE3>
301+ static inline
302+ void local_gen_irv_convert_to_integer (const line_buf *src_line,
347303 line_buf *dst_line, ui32 dst_line_offset,
348304 ui32 bit_depth, bool is_signed, ui32 width)
349305 {
@@ -371,19 +327,20 @@ namespace ojph {
371327 if (is_signed)
372328 {
373329 const si32 bias = (1 << (bit_depth - 1 )) + 1 ;
374- for (ui32 i = width; i > 0 ; --i) {
330+ for (int i = width; i > 0 ; --i) {
375331 float t = *sp++ * mul;
376332 si32 v = ojph_round (t);
377333 v = t >= fl_low_lim ? v : s32_low_lim;
378334 v = t < fl_up_lim ? v : s32_up_lim;
379- v = (v >= 0 ) ? v : (- v - bias);
335+ if (NLT_TYPE3)
336+ v = (v >= 0 ) ? v : (- v - bias);
380337 *dp++ = v;
381338 }
382339 }
383340 else
384341 {
385- const si32 half = ( 1 << (bit_depth - 1 ) );
386- for (ui32 i = width; i > 0 ; --i) {
342+ const si32 half = 1 << (bit_depth - 1 );
343+ for (int i = width; i > 0 ; --i) {
387344 float t = *sp++ * mul;
388345 si32 v = ojph_round (t);
389346 v = t >= fl_low_lim ? v : s32_low_lim;
@@ -394,7 +351,27 @@ namespace ojph {
394351 }
395352
396353 // ////////////////////////////////////////////////////////////////////////
397- void gen_irv_convert_to_float_nlt_type3 (const line_buf *src_line,
354+ void gen_irv_convert_to_integer (const line_buf *src_line,
355+ line_buf *dst_line, ui32 dst_line_offset,
356+ ui32 bit_depth, bool is_signed, ui32 width)
357+ {
358+ local_gen_irv_convert_to_integer<false >(src_line, dst_line,
359+ dst_line_offset, bit_depth, is_signed, width);
360+ }
361+
362+ // ////////////////////////////////////////////////////////////////////////
363+ void gen_irv_convert_to_integer_nlt_type3 (const line_buf *src_line,
364+ line_buf *dst_line, ui32 dst_line_offset,
365+ ui32 bit_depth, bool is_signed, ui32 width)
366+ {
367+ local_gen_irv_convert_to_integer<true >(src_line, dst_line,
368+ dst_line_offset, bit_depth, is_signed, width);
369+ }
370+
371+ // ////////////////////////////////////////////////////////////////////////
372+ template <bool NLT_TYPE3>
373+ static inline
374+ void local_gen_irv_convert_to_float (const line_buf *src_line,
398375 ui32 src_line_offset, line_buf *dst_line,
399376 ui32 bit_depth, bool is_signed, ui32 width)
400377 {
@@ -411,16 +388,17 @@ namespace ojph {
411388 if (is_signed)
412389 {
413390 si32 bias = (si32)((ui32)INT_MIN + 1 );
414- for (ui32 i = width; i > 0 ; --i) {
391+ for (int i = width; i > 0 ; --i) {
415392 si32 v = *sp++ << shift;
416- v = (v >= 0 ) ? v : (- v - bias);
393+ if (NLT_TYPE3)
394+ v = (v >= 0 ) ? v : (- v - bias);
417395 *dp++ = (float )v * mul;
418396 }
419397 }
420398 else
421399 {
422400 const ui32 half = (ui32)INT_MIN;
423- for (ui32 i = width; i > 0 ; --i) {
401+ for (int i = width; i > 0 ; --i) {
424402 ui32 v = (ui32)*sp++;
425403 v <<= shift;
426404 v -= half;
@@ -429,6 +407,24 @@ namespace ojph {
429407 }
430408 }
431409
410+ // ////////////////////////////////////////////////////////////////////////
411+ void gen_irv_convert_to_float (const line_buf *src_line,
412+ ui32 src_line_offset, line_buf *dst_line,
413+ ui32 bit_depth, bool is_signed, ui32 width)
414+ {
415+ local_gen_irv_convert_to_float<false >(src_line, src_line_offset,
416+ dst_line, bit_depth, is_signed, width);
417+ }
418+
419+ // ////////////////////////////////////////////////////////////////////////
420+ void gen_irv_convert_to_float_nlt_type3 (const line_buf *src_line,
421+ ui32 src_line_offset, line_buf *dst_line,
422+ ui32 bit_depth, bool is_signed, ui32 width)
423+ {
424+ local_gen_irv_convert_to_float<true >(src_line, src_line_offset,
425+ dst_line, bit_depth, is_signed, width);
426+ }
427+
432428 // ////////////////////////////////////////////////////////////////////////
433429 void gen_rct_forward (
434430 const line_buf *r, const line_buf *g, const line_buf *b,
0 commit comments