@@ -78,6 +78,16 @@ namespace ojph {
7878 void (*cnvrt_float_to_si32)
7979 (const float *sp, si32 *dp, float mul, ui32 width) = NULL ;
8080
81+ // ////////////////////////////////////////////////////////////////////////
82+ void (*irv_convert_to_float_nlt_type3) (
83+ const line_buf *src_line, ui32 src_line_offset,
84+ line_buf *dst_line, ui32 bit_depth, bool is_signed, ui32 width) = NULL ;
85+
86+ // ////////////////////////////////////////////////////////////////////////
87+ void (*irv_convert_to_integer_nlt_type3) (
88+ const line_buf *src_line, line_buf *dst_line, ui32 dst_line_offset,
89+ ui32 bit_depth, bool is_signed, ui32 width) = NULL ;
90+
8191 // ////////////////////////////////////////////////////////////////////////
8292 void (*rct_forward)
8393 (const line_buf* r, const line_buf* g, const line_buf* b,
@@ -115,6 +125,8 @@ namespace ojph {
115125 cnvrt_si32_to_float = gen_cnvrt_si32_to_float;
116126 cnvrt_float_to_si32_shftd = gen_cnvrt_float_to_si32_shftd;
117127 cnvrt_float_to_si32 = gen_cnvrt_float_to_si32;
128+ irv_convert_to_float_nlt_type3 = gen_irv_convert_to_float_nlt_type3;
129+ irv_convert_to_integer_nlt_type3 = gen_irv_convert_to_integer_nlt_type3;
118130 rct_forward = gen_rct_forward;
119131 rct_backward = gen_rct_backward;
120132 ict_forward = gen_ict_forward;
@@ -237,8 +249,8 @@ namespace ojph {
237249 }
238250 else
239251 {
240- assert (src_line->flags | line_buf::LFT_64BIT);
241- assert (dst_line->flags | line_buf::LFT_32BIT);
252+ assert (src_line->flags & line_buf::LFT_64BIT);
253+ assert (dst_line->flags & line_buf::LFT_32BIT);
242254 const si64 *sp = src_line->i64 + src_line_offset;
243255 si32 *dp = dst_line->i32 + dst_line_offset;
244256 for (ui32 i = width; i > 0 ; --i)
@@ -276,8 +288,8 @@ namespace ojph {
276288 }
277289 else
278290 {
279- assert (src_line->flags | line_buf::LFT_64BIT);
280- assert (dst_line->flags | line_buf::LFT_32BIT);
291+ assert (src_line->flags & line_buf::LFT_64BIT);
292+ assert (dst_line->flags & line_buf::LFT_32BIT);
281293 const si64 *sp = src_line->i64 + src_line_offset;
282294 si32 *dp = dst_line->i32 + dst_line_offset;
283295 for (ui32 i = width; i > 0 ; --i) {
@@ -319,6 +331,131 @@ namespace ojph {
319331 *dp++ = ojph_round (*sp++ * mul);
320332 }
321333
334+ // ////////////////////////////////////////////////////////////////////////
335+ void gen_irv_convert_to_float_nlt_type3 (const line_buf *src_line,
336+ ui32 src_line_offset, line_buf *dst_line,
337+ ui32 bit_depth, bool is_signed, ui32 width)
338+ {
339+ assert ((src_line->flags & line_buf::LFT_32BIT) &&
340+ (src_line->flags & line_buf::LFT_REVERSIBLE) == 0 &&
341+ (dst_line->flags & line_buf::LFT_32BIT) &&
342+ (dst_line->flags & line_buf::LFT_REVERSIBLE) == 0 );
343+
344+ float mul;
345+ if (bit_depth < 32 )
346+ mul = 1 .0f / (float )(1 << bit_depth);
347+ else
348+ mul = (float )(1.0 / 65536.0 / 65536.0 );
349+
350+ const si32* sp = src_line->i32 + src_line_offset;
351+ float * dp = dst_line->f32 ;
352+ if (is_signed)
353+ {
354+ si32 shift = (1 << (bit_depth - 1 )) + 1 ;
355+ for (ui32 i = width; i > 0 ; --i) {
356+ si32 v = *sp++;
357+ v = (v >= 0 ) ? v : (- v - shift);
358+ *dp++ = (float )v * mul;
359+ }
360+ }
361+ else
362+ {
363+ for (ui32 i = width; i > 0 ; --i)
364+ *dp++ = (float )*sp++ * mul - 0 .5f ;
365+ }
366+ }
367+
368+ // ////////////////////////////////////////////////////////////////////////
369+ void gen_irv_convert_to_integer_nlt_type3 (const line_buf *src_line,
370+ line_buf *dst_line, ui32 dst_line_offset,
371+ ui32 bit_depth, bool is_signed, ui32 width)
372+ {
373+ assert ((src_line->flags & line_buf::LFT_32BIT) &&
374+ (src_line->flags & line_buf::LFT_REVERSIBLE) == 0 &&
375+ (dst_line->flags & line_buf::LFT_32BIT) &&
376+ (dst_line->flags & line_buf::LFT_REVERSIBLE) == 0 );
377+
378+ float mul;
379+ if (bit_depth < 32 )
380+ mul = 1 .0f / (float )(1 << bit_depth);
381+ else
382+ mul = (float )(1.0 / 65536.0 / 65536.0 );
383+
384+ const float * sp = src_line->f32 ;
385+ si32* dp = dst_line->i32 + dst_line_offset;
386+ if (bit_depth <= 30 )
387+ {
388+ // We are leaving two bit overhead -- here, we are assuming that after
389+ // multiplications, the resulting number can still be represented
390+ // using 32 bit integer
391+ const si32 half = (1 << (bit_depth - 1 ));
392+ const si32 shift = half + 1 ;
393+ const si32 upper_limit = 0x7FFFFFFF >> (32 - bit_depth);
394+ const si32 lower_limit = 0x80000000 >> (32 - bit_depth);
395+
396+ if (is_signed)
397+ {
398+ for (ui32 i = width; i > 0 ; --i) {
399+ si32 v = ojph_round (*sp++ * mul);
400+ v = ojph_max (v, lower_limit);
401+ v = ojph_min (v, upper_limit);
402+ v = (v >= 0 ) ? v : (- v - shift);
403+ *dp++ = v;
404+ }
405+ }
406+ else
407+ {
408+ for (ui32 i = width; i > 0 ; --i) {
409+ si32 v = ojph_round (*sp++ * mul);
410+ v = ojph_max (v, lower_limit);
411+ v = ojph_min (v, upper_limit);
412+ v = (v >= 0 ) ? v : (- v - shift);
413+ *dp++ = v + half;
414+ }
415+ }
416+ }
417+ else
418+ {
419+ // There is the possibility that converting to integer will
420+ // exceed the dynamic range of 32bit integer; therefore, we need
421+ // to use 64 bit. One may think, why not limit the floats to the
422+ // range of [-0.5f, 0.5f)?
423+ // Notice the half closed range -- we need a value just below 0.5f.
424+ // While getting this number is possible, after multiplication, the
425+ // resulting number will not be exactly the maximum that the integer
426+ // can achieve. All this is academic, because here are talking
427+ // about a number which has all the exponent bits set, meaning
428+ // it is either infinity, -infinity, qNan or sNan.
429+ const si32 half = (1 << (bit_depth - 1 ));
430+ const si32 shift = half + 1 ;
431+ const si64 upper_limit = 0x7FFFFFFFFFFFFFFFLL >> (64 - bit_depth);
432+ const si64 lower_limit = 0x8000000000000000LL >> (64 - bit_depth);
433+
434+ if (is_signed)
435+ {
436+ for (ui32 i = width; i > 0 ; --i) {
437+ si64 t = ojph_round64 (*sp++ * mul);
438+ t = ojph_max (t, lower_limit);
439+ t = ojph_min (t, upper_limit);
440+ si32 v = (si32)t;
441+ v = (v >= 0 ) ? v : (- v - shift);
442+ *dp++ = v;
443+ }
444+ }
445+ else
446+ {
447+ for (ui32 i = width; i > 0 ; --i) {
448+ si64 t = ojph_round64 (*sp++ * mul);
449+ t = ojph_max (t, lower_limit);
450+ t = ojph_min (t, upper_limit);
451+ si32 v = (si32)t;
452+ v = (v >= 0 ) ? v : (- v - shift);
453+ *dp++ = v + half;
454+ }
455+ }
456+ }
457+ }
458+
322459 // ////////////////////////////////////////////////////////////////////////
323460 void gen_rct_forward (
324461 const line_buf *r, const line_buf *g, const line_buf *b,
0 commit comments