3535// Date: 15 May 2022
3636// ***************************************************************************/
3737
38+ #include < climits>
3839#include < cstddef>
3940#include < wasm_simd128.h>
4041
4344namespace ojph {
4445 namespace local {
4546
46- // ////////////////////////////////////////////////////////////////////////
47- #define REPEAT (a ) a,a,a,a
48-
4947 // ////////////////////////////////////////////////////////////////////////
5048 void wasm_mem_clear (void * addr, size_t count)
5149 {
5250 float * p = (float *)addr;
53- v128_t zero = wasm_i32x4_const ( REPEAT ( 0 ) );
51+ v128_t zero = wasm_i32x4_splat ( 0 );
5452 for (size_t i = 0 ; i < count; i += 16 , p += 4 )
5553 wasm_v128_store (p, zero);
5654 }
5755
5856 // ////////////////////////////////////////////////////////////////////////
59- ui32 wasm_find_max_val (ui32* address)
57+ ui32 wasm_find_max_val32 (ui32* address)
6058 {
6159 v128_t x1, x0 = wasm_v128_load (address);
6260 x1 = wasm_i32x4_shuffle (x0, x0, 2 , 3 , 2 , 3 ); // x1 = x0[2,3,2,3]
@@ -68,16 +66,26 @@ namespace ojph {
6866 }
6967
7068 // ////////////////////////////////////////////////////////////////////////
71- void wasm_rev_tx_to_cb (const void *sp, ui32 *dp, ui32 K_max,
72- float delta_inv, ui32 count, ui32* max_val)
69+ ui64 wasm_find_max_val64 (ui64* address)
70+ {
71+ v128_t x1, x0 = wasm_v128_load (address);
72+ x1 = wasm_i64x2_shuffle (x0, x0, 1 , 1 ); // x1 = x0[2,3,2,3]
73+ x0 = wasm_v128_or (x0, x1);
74+ ui64 t = (ui64)wasm_i64x2_extract_lane (x0, 0 );
75+ return t;
76+ }
77+
78+ // ////////////////////////////////////////////////////////////////////////
79+ void wasm_rev_tx_to_cb32 (const void *sp, ui32 *dp, ui32 K_max,
80+ float delta_inv, ui32 count, ui32* max_val)
7381 {
7482 ojph_unused (delta_inv);
7583
7684 // convert to sign and magnitude and keep max_val
7785 ui32 shift = 31 - K_max;
78- v128_t m0 = wasm_i32x4_const ( REPEAT (( int ) 0x80000000 ) );
79- v128_t zero = wasm_i32x4_const ( REPEAT ( 0 ) );
80- v128_t one = wasm_i32x4_const ( REPEAT ( 1 ) );
86+ v128_t m0 = wasm_i32x4_splat (INT_MIN );
87+ v128_t zero = wasm_i32x4_splat ( 0 );
88+ v128_t one = wasm_i32x4_splat ( 1 );
8189 v128_t tmax = wasm_v128_load (max_val);
8290 v128_t *p = (v128_t *)sp;
8391 for (ui32 i = 0 ; i < count; i += 4 , p += 1 , dp += 4 )
@@ -97,16 +105,16 @@ namespace ojph {
97105 }
98106
99107 // ////////////////////////////////////////////////////////////////////////
100- void wasm_irv_tx_to_cb (const void *sp, ui32 *dp, ui32 K_max,
101- float delta_inv, ui32 count, ui32* max_val)
108+ void wasm_irv_tx_to_cb32 (const void *sp, ui32 *dp, ui32 K_max,
109+ float delta_inv, ui32 count, ui32* max_val)
102110 {
103111 ojph_unused (K_max);
104112
105113 // quantize and convert to sign and magnitude and keep max_val
106114
107115 v128_t d = wasm_f32x4_splat (delta_inv);
108- v128_t zero = wasm_i32x4_const ( REPEAT ( 0 ) );
109- v128_t one = wasm_i32x4_const ( REPEAT ( 1 ) );
116+ v128_t zero = wasm_i32x4_splat ( 0 );
117+ v128_t one = wasm_i32x4_splat ( 1 );
110118 v128_t tmax = wasm_v128_load (max_val);
111119 float *p = (float *)sp;
112120 for (ui32 i = 0 ; i < count; i += 4 , p += 4 , dp += 4 )
@@ -127,14 +135,14 @@ namespace ojph {
127135 }
128136
129137 // ////////////////////////////////////////////////////////////////////////
130- void wasm_rev_tx_from_cb (const ui32 *sp, void *dp, ui32 K_max,
131- float delta, ui32 count)
138+ void wasm_rev_tx_from_cb32 (const ui32 *sp, void *dp, ui32 K_max,
139+ float delta, ui32 count)
132140 {
133141 ojph_unused (delta);
134142 ui32 shift = 31 - K_max;
135- v128_t m1 = wasm_i32x4_const ( REPEAT ( 0x7FFFFFFF ) );
136- v128_t zero = wasm_i32x4_const ( REPEAT ( 0 ) );
137- v128_t one = wasm_i32x4_const ( REPEAT ( 1 ) );
143+ v128_t m1 = wasm_i32x4_splat (INT_MAX );
144+ v128_t zero = wasm_i32x4_splat ( 0 );
145+ v128_t one = wasm_i32x4_splat ( 1 );
138146 si32 *p = (si32*)dp;
139147 for (ui32 i = 0 ; i < count; i += 4 , sp += 4 , p += 4 )
140148 {
@@ -150,11 +158,11 @@ namespace ojph {
150158 }
151159
152160 // ////////////////////////////////////////////////////////////////////////
153- void wasm_irv_tx_from_cb (const ui32 *sp, void *dp, ui32 K_max,
154- float delta, ui32 count)
161+ void wasm_irv_tx_from_cb32 (const ui32 *sp, void *dp, ui32 K_max,
162+ float delta, ui32 count)
155163 {
156164 ojph_unused (K_max);
157- v128_t m1 = wasm_i32x4_const ( REPEAT ( 0x7FFFFFFF ) );
165+ v128_t m1 = wasm_i32x4_splat (INT_MAX );
158166 v128_t d = wasm_f32x4_splat (delta);
159167 float *p = (float *)dp;
160168 for (ui32 i = 0 ; i < count; i += 4 , sp += 4 , p += 4 )
@@ -167,6 +175,58 @@ namespace ojph {
167175 valf = wasm_v128_or (valf, sign);
168176 wasm_v128_store (p, valf);
169177 }
170- }
178+ }
179+
180+ // ////////////////////////////////////////////////////////////////////////
181+ void wasm_rev_tx_to_cb64 (const void *sp, ui64 *dp, ui32 K_max,
182+ float delta_inv, ui32 count, ui64* max_val)
183+ {
184+ ojph_unused (delta_inv);
185+
186+ // convert to sign and magnitude and keep max_val
187+ ui32 shift = 63 - K_max;
188+ v128_t m0 = wasm_i64x2_splat (LLONG_MIN);
189+ v128_t zero = wasm_i64x2_splat (0 );
190+ v128_t one = wasm_i64x2_splat (1 );
191+ v128_t tmax = wasm_v128_load (max_val);
192+ si64 *p = (si64*)sp;
193+ for (ui32 i = 0 ; i < count; i += 2 , p += 2 , dp += 2 )
194+ {
195+ v128_t v = wasm_v128_load ((v128_t *)sp);
196+ v128_t sign = wasm_i64x2_lt (v, zero);
197+ v128_t val = wasm_v128_xor (v, sign); // negate 1's complement
198+ v128_t ones = wasm_v128_and (sign, one);
199+ val = wasm_i64x2_add (val, ones); // 2's complement
200+ sign = wasm_v128_and (sign, m0);
201+ val = wasm_i64x2_shl (val, shift);
202+ tmax = wasm_v128_or (tmax, val);
203+ val = wasm_v128_or (val, sign);
204+ wasm_v128_store (dp, val);
205+ }
206+ wasm_v128_store (max_val, tmax);
207+ }
208+
209+ // ////////////////////////////////////////////////////////////////////////
210+ void wasm_rev_tx_from_cb64 (const ui64 *sp, void *dp, ui32 K_max,
211+ float delta, ui32 count)
212+ {
213+ ojph_unused (delta);
214+ ui32 shift = 63 - K_max;
215+ v128_t m1 = wasm_i64x2_splat (LLONG_MAX);
216+ v128_t zero = wasm_i64x2_splat (0 );
217+ v128_t one = wasm_i64x2_splat (1 );
218+ si64 *p = (si64*)dp;
219+ for (ui32 i = 0 ; i < count; i += 2 , sp += 2 , p += 2 )
220+ {
221+ v128_t v = wasm_v128_load ((v128_t *)sp);
222+ v128_t val = wasm_v128_and (v, m1);
223+ val = wasm_i64x2_shr (val, shift);
224+ v128_t sign = wasm_i64x2_lt (v, zero);
225+ val = wasm_v128_xor (val, sign); // negate 1's complement
226+ v128_t ones = wasm_v128_and (sign, one);
227+ val = wasm_i64x2_add (val, ones); // 2's complement
228+ wasm_v128_store (p, val);
229+ }
230+ }
171231 }
172232}
0 commit comments