Generic NLT code is added. Testing is needed.

aous72 · aous72 · commit 7086c360d960 · 2025-01-21T22:18:38.000+11:00
diff --git a/src/core/codestream/ojph_tile.cpp b/src/core/codestream/ojph_tile.cpp
@@ -273,13 +273,18 @@ namespace ojph {
         }
         else
         {
-          float mul = 1.0f / (float)(1<<num_bits[comp_num]);
-          const si32 *sp = line->i32 + line_offsets[comp_num];
-          float *dp = tc->f32;
-          if (is_signed[comp_num])
-            cnvrt_si32_to_float(sp, dp, mul, comp_width);
-          else
-            cnvrt_si32_to_float_shftd(sp, dp, mul, comp_width);
+          if (nlt_type3[comp_num] == type3)
+            irv_convert_to_float_nlt_type3(line, line_offsets[comp_num],
+              tc, num_bits[comp_num], is_signed[comp_num], comp_width);
+          else {
+            float mul = 1.0f / (float)(1<<num_bits[comp_num]);
+            const si32 *sp = line->i32 + line_offsets[comp_num];
+            float *dp = tc->f32;
+            if (is_signed[comp_num])
+              cnvrt_si32_to_float(sp, dp, mul, comp_width);
+            else
+              cnvrt_si32_to_float_shftd(sp, dp, mul, comp_width);
+          }
         }
         comps[comp_num].push_line();
       }
@@ -311,13 +316,19 @@ namespace ojph {
         }
         else
         {
-          float mul = 1.0f / (float)(1<<num_bits[comp_num]);
-          const si32 *sp = line->i32 + line_offsets[comp_num];
-          float *dp = lines[comp_num].f32;
-          if (is_signed[comp_num])
-            cnvrt_si32_to_float(sp, dp, mul, comp_width);
-          else
-            cnvrt_si32_to_float_shftd(sp, dp, mul, comp_width);
+          if (nlt_type3[comp_num] == type3)
+            irv_convert_to_float_nlt_type3(line, line_offsets[comp_num],
+              lines + comp_num, num_bits[comp_num], is_signed[comp_num], 
+              comp_width);
+          else {
+            float mul = 1.0f / (float)(1<<num_bits[comp_num]);
+            const si32 *sp = line->i32 + line_offsets[comp_num];
+            float *dp = lines[comp_num].f32;
+            if (is_signed[comp_num])
+              cnvrt_si32_to_float(sp, dp, mul, comp_width);
+            else
+              cnvrt_si32_to_float_shftd(sp, dp, mul, comp_width);
+          }
           if (comp_num == 2)
           { // irreversible color transform
             ict_forward(lines[0].f32, lines[1].f32, lines[2].f32,
@@ -364,13 +375,21 @@ namespace ojph {
         }
         else
         {
-          float mul = (float)(1 << num_bits[comp_num]);
-          const float *sp = src_line->f32;
-          si32 *dp = tgt_line->i32 + line_offsets[comp_num];
-          if (is_signed[comp_num])
-            cnvrt_float_to_si32(sp, dp, mul, comp_width);
-          else
-            cnvrt_float_to_si32_shftd(sp, dp, mul, comp_width);
+          if (nlt_type3[comp_num] == type3)
+          {
+            irv_convert_to_integer_nlt_type3(src_line, tgt_line, 
+              line_offsets[comp_num], num_bits[comp_num], 
+              is_signed[comp_num], comp_width);
+          }
+          else {
+            float mul = (float)(1 << num_bits[comp_num]);
+            const float *sp = src_line->f32;
+            si32 *dp = tgt_line->i32 + line_offsets[comp_num];
+            if (is_signed[comp_num])
+              cnvrt_float_to_si32(sp, dp, mul, comp_width);
+            else
+              cnvrt_float_to_si32_shftd(sp, dp, mul, comp_width);
+          }
         }
       }
       else
@@ -407,17 +426,30 @@ namespace ojph {
         }
         else
         {
-          float mul = (float)(1 << num_bits[comp_num]);
-          const float *sp;
-          if (comp_num < 3)
-            sp = lines[comp_num].f32;
-          else
-            sp = comps[comp_num].pull_line()->f32;
-          si32 *dp = tgt_line->i32 + line_offsets[comp_num];
-          if (is_signed[comp_num])
-            cnvrt_float_to_si32(sp, dp, mul, comp_width);
-          else
-            cnvrt_float_to_si32_shftd(sp, dp, mul, comp_width);
+          if (nlt_type3[comp_num] == type3)
+          {
+            line_buf* lbp;
+            if (comp_num < 3)
+              lbp = lines + comp_num;
+            else
+              lbp = comps[comp_num].pull_line();            
+            irv_convert_to_integer_nlt_type3(lbp, tgt_line, 
+              line_offsets[comp_num], num_bits[comp_num], 
+              is_signed[comp_num], comp_width);
+          }
+          else {
+            float mul = (float)(1 << num_bits[comp_num]);
+            const float *sp;
+            if (comp_num < 3)
+              sp = lines[comp_num].f32;
+            else
+              sp = comps[comp_num].pull_line()->f32;
+            si32 *dp = tgt_line->i32 + line_offsets[comp_num];
+            if (is_signed[comp_num])
+              cnvrt_float_to_si32(sp, dp, mul, comp_width);
+            else
+              cnvrt_float_to_si32_shftd(sp, dp, mul, comp_width);
+          }
         }
       }
 
diff --git a/src/core/common/ojph_arch.h b/src/core/common/ojph_arch.h
@@ -271,6 +271,18 @@ namespace ojph {
   #endif
   }
 
+  ////////////////////////////////////////////////////////////////////////////
+  static inline si64 ojph_round64(float val)
+  {
+  #ifdef OJPH_COMPILER_MSVC
+    return (si64)(val + (val >= 0.0f ? 0.5f : -0.5f));
+  #elif (defined OJPH_COMPILER_GNUC)
+    return (si64)(val + (val >= 0.0f ? 0.5f : -0.5f));
+  #else
+    return (si64)round(val);
+  #endif
+  }
+
   ////////////////////////////////////////////////////////////////////////////
   static inline si32 ojph_trunc(float val)
   {
diff --git a/src/core/common/ojph_mem.h b/src/core/common/ojph_mem.h
@@ -138,8 +138,8 @@ namespace ojph {
     enum : ui32 {
       LFT_UNDEFINED  = 0x00, // Type is undefined/uninitialized
                              // These flags reflects data size in bytes
-      LFT_BYTE       = 0x01, // Set when data is 1 byte
-      LFT_16BIT      = 0x02, // Set when data is 2 bytes
+      LFT_BYTE       = 0x01, // Set when data is 1 byte  (not used)
+      LFT_16BIT      = 0x02, // Set when data is 2 bytes (not used)
       LFT_32BIT      = 0x04, // Set when data is 4 bytes
       LFT_64BIT      = 0x08, // Set when data is 8 bytes
       LFT_REVERSIBLE = 0x10, // Set when data is used for reversible coding
diff --git a/src/core/transform/ojph_colour.cpp b/src/core/transform/ojph_colour.cpp
@@ -78,6 +78,16 @@ namespace ojph {
     void (*cnvrt_float_to_si32)
       (const float *sp, si32 *dp, float mul, ui32 width) = NULL;
 
+    //////////////////////////////////////////////////////////////////////////
+    void (*irv_convert_to_float_nlt_type3) (
+      const line_buf *src_line, ui32 src_line_offset,
+      line_buf *dst_line, ui32 bit_depth, bool is_signed, ui32 width) = NULL;
+      
+    //////////////////////////////////////////////////////////////////////////
+    void (*irv_convert_to_integer_nlt_type3) (
+      const line_buf *src_line, line_buf *dst_line, ui32 dst_line_offset, 
+      ui32 bit_depth, bool is_signed, ui32 width) = NULL;
+
     //////////////////////////////////////////////////////////////////////////
     void (*rct_forward)
       (const line_buf* r, const line_buf* g, const line_buf* b,
@@ -115,6 +125,8 @@ namespace ojph {
       cnvrt_si32_to_float = gen_cnvrt_si32_to_float;
       cnvrt_float_to_si32_shftd = gen_cnvrt_float_to_si32_shftd;
       cnvrt_float_to_si32 = gen_cnvrt_float_to_si32;
+      irv_convert_to_float_nlt_type3 = gen_irv_convert_to_float_nlt_type3;
+      irv_convert_to_integer_nlt_type3 = gen_irv_convert_to_integer_nlt_type3;
       rct_forward = gen_rct_forward;
       rct_backward = gen_rct_backward;
       ict_forward = gen_ict_forward;
@@ -237,8 +249,8 @@ namespace ojph {
       }
       else 
       {
-        assert(src_line->flags | line_buf::LFT_64BIT);
-        assert(dst_line->flags | line_buf::LFT_32BIT);
+        assert(src_line->flags & line_buf::LFT_64BIT);
+        assert(dst_line->flags & line_buf::LFT_32BIT);
         const si64 *sp = src_line->i64 + src_line_offset;
         si32 *dp = dst_line->i32 + dst_line_offset;
         for (ui32 i = width; i > 0; --i)
@@ -276,8 +288,8 @@ namespace ojph {
       }
       else 
       {
-        assert(src_line->flags | line_buf::LFT_64BIT);
-        assert(dst_line->flags | line_buf::LFT_32BIT);
+        assert(src_line->flags & line_buf::LFT_64BIT);
+        assert(dst_line->flags & line_buf::LFT_32BIT);
         const si64 *sp = src_line->i64 + src_line_offset;
         si32 *dp = dst_line->i32 + dst_line_offset;
         for (ui32 i = width; i > 0; --i) {
@@ -319,6 +331,131 @@ namespace ojph {
         *dp++ = ojph_round(*sp++ * mul);
     }
 
+    //////////////////////////////////////////////////////////////////////////
+    void gen_irv_convert_to_float_nlt_type3(const line_buf *src_line, 
+      ui32 src_line_offset, line_buf *dst_line, 
+      ui32 bit_depth, bool is_signed, ui32 width)
+    {
+      assert((src_line->flags & line_buf::LFT_32BIT) &&
+             (src_line->flags & line_buf::LFT_REVERSIBLE) == 0 &&
+             (dst_line->flags & line_buf::LFT_32BIT) &&
+             (dst_line->flags & line_buf::LFT_REVERSIBLE) == 0);
+
+      float mul;
+      if (bit_depth < 32)
+        mul = 1.0f / (float)(1 << bit_depth);
+      else
+        mul = (float)(1.0 / 65536.0 / 65536.0);
+
+      const si32* sp = src_line->i32 + src_line_offset;
+      float* dp = dst_line->f32;
+      if (is_signed)
+      {
+        si32 shift = (1 << (bit_depth - 1)) + 1;
+        for (ui32 i = width; i > 0; --i) {
+          si32 v = *sp++;
+          v = (v >= 0) ? v : (- v - shift);
+          *dp++ = (float)v * mul;
+        }
+      }
+      else
+      {
+        for (ui32 i = width; i > 0; --i)
+          *dp++ = (float)*sp++ * mul - 0.5f;
+      }
+    }
+
+    //////////////////////////////////////////////////////////////////////////
+    void gen_irv_convert_to_integer_nlt_type3(const line_buf *src_line, 
+      line_buf *dst_line, ui32 dst_line_offset,
+      ui32 bit_depth, bool is_signed, ui32 width)
+    {
+      assert((src_line->flags & line_buf::LFT_32BIT) &&
+             (src_line->flags & line_buf::LFT_REVERSIBLE) == 0 &&
+             (dst_line->flags & line_buf::LFT_32BIT) &&
+             (dst_line->flags & line_buf::LFT_REVERSIBLE) == 0);
+      
+      float mul;
+      if (bit_depth < 32)
+        mul = 1.0f / (float)(1 << bit_depth);
+      else
+        mul = (float)(1.0 / 65536.0 / 65536.0);
+
+      const float* sp = src_line->f32;
+      si32* dp = dst_line->i32 + dst_line_offset;
+      if (bit_depth <= 30) 
+      {
+        // We are leaving two bit overhead -- here, we are assuming that after
+        // multiplications, the resulting number can still be represented
+        // using 32 bit integer
+        const si32 half = (1 << (bit_depth - 1));
+        const si32 shift = half + 1;
+        const si32 upper_limit = 0x7FFFFFFF >> (32 - bit_depth);
+        const si32 lower_limit = 0x80000000 >> (32 - bit_depth);
+
+        if (is_signed)
+        {
+          for (ui32 i = width; i > 0; --i) {
+            si32 v = ojph_round(*sp++ * mul);
+            v = ojph_max(v, lower_limit);
+            v = ojph_min(v, upper_limit);
+            v = (v >= 0) ? v : (- v - shift);
+            *dp++ = v;
+          }
+        }
+        else
+        {
+          for (ui32 i = width; i > 0; --i) {
+            si32 v = ojph_round(*sp++ * mul);
+            v = ojph_max(v, lower_limit);
+            v = ojph_min(v, upper_limit);
+            v = (v >= 0) ? v : (- v - shift);
+            *dp++ = v + half;
+          }
+        }
+      }
+      else
+      {
+        // There is the possibility that converting to integer will
+        // exceed the dynamic range of 32bit integer; therefore, we need
+        // to use 64 bit.  One may think, why not limit the floats to the
+        // range of [-0.5f, 0.5f)? 
+        // Notice the half closed range -- we need a value just below 0.5f.
+        // While getting this number is possible, after multiplication, the
+        // resulting number will not be exactly the maximum that the integer 
+        // can achieve.  All this is academic, because here are talking
+        // about a number which has all the exponent bits set, meaning 
+        // it is either infinity, -infinity, qNan or sNan.
+        const si32 half = (1 << (bit_depth - 1));
+        const si32 shift = half + 1;
+        const si64 upper_limit = 0x7FFFFFFFFFFFFFFFLL >> (64 - bit_depth);
+        const si64 lower_limit = 0x8000000000000000LL >> (64 - bit_depth);
+
+        if (is_signed)
+        {
+          for (ui32 i = width; i > 0; --i) {
+            si64 t = ojph_round64(*sp++ * mul);
+            t = ojph_max(t, lower_limit);
+            t = ojph_min(t, upper_limit);
+            si32 v = (si32)t;
+            v = (v >= 0) ? v : (- v - shift);
+            *dp++ = v;
+          }
+        }
+        else
+        {
+          for (ui32 i = width; i > 0; --i) {
+            si64 t = ojph_round64(*sp++ * mul);
+            t = ojph_max(t, lower_limit);
+            t = ojph_min(t, upper_limit);
+            si32 v = (si32)t;
+            v = (v >= 0) ? v : (- v - shift);
+            *dp++ = v + half;
+          }
+        }
+      }
+    }
+
     //////////////////////////////////////////////////////////////////////////
     void gen_rct_forward(
       const line_buf *r, const line_buf *g, const line_buf *b,
diff --git a/src/core/transform/ojph_colour.h b/src/core/transform/ojph_colour.h
@@ -77,6 +77,16 @@ namespace ojph {
   extern void (*cnvrt_float_to_si32)
     (const float *sp, si32 *dp, float mul, ui32 width);
 
+  ////////////////////////////////////////////////////////////////////////////
+  extern void (*irv_convert_to_float_nlt_type3) (
+    const line_buf *src_line, ui32 src_line_offset,
+    line_buf *dst_line, ui32 bit_depth, bool is_signed, ui32 width);
+
+  ////////////////////////////////////////////////////////////////////////////
+  extern void (*irv_convert_to_integer_nlt_type3) (
+    const line_buf *src_line, line_buf *dst_line, ui32 dst_line_offset,
+    ui32 bit_depth, bool is_signed, ui32 width);
+
   ////////////////////////////////////////////////////////////////////////////
   extern void (*rct_forward)
     (const line_buf *r, const line_buf *g, const line_buf *b,
diff --git a/src/core/transform/ojph_colour_local.h b/src/core/transform/ojph_colour_local.h
@@ -92,6 +92,16 @@ namespace ojph {
     void gen_cnvrt_float_to_si32(const float *sp, si32 *dp, float mul,
                                  ui32 width);
 
+    //////////////////////////////////////////////////////////////////////////
+    void gen_irv_convert_to_float_nlt_type3(
+      const line_buf *src_line, ui32 src_line_offset,
+      line_buf *dst_line, ui32 bit_depth, bool is_signed, ui32 width);
+
+    //////////////////////////////////////////////////////////////////////////
+    void gen_irv_convert_to_integer_nlt_type3(
+      const line_buf *src_line, line_buf *dst_line, ui32 dst_line_offset,
+      ui32 bit_depth, bool is_signed, ui32 width);
+
     //////////////////////////////////////////////////////////////////////////
     void gen_rct_forward(
       const line_buf *r, const line_buf *g, const line_buf *b,