Skip to content

Commit fd295de

Browse files
committed
Bug fixes, some historic.
1 parent 3527f0a commit fd295de

File tree

2 files changed

+51
-31
lines changed

2 files changed

+51
-31
lines changed

src/core/codestream/ojph_codeblock_fun.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,8 @@ namespace ojph {
276276
tx_from_cb64 = NULL;
277277
}
278278
encode_cb64 = ojph_encode_codeblock64;
279+
bool result = initialize_block_encoder_tables();
280+
assert(result); ojph_unused(result);
279281

280282
#endif // !OJPH_ENABLE_WASM_SIMD
281283

src/core/transform/ojph_colour_wasm.cpp

Lines changed: 49 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,20 @@
4747
namespace ojph {
4848
namespace local {
4949

50+
//////////////////////////////////////////////////////////////////////////
51+
static inline
52+
v128_t ojph_convert_float_to_i32(v128_t a, v128_t zero, v128_t half)
53+
{ // We implement ojph_round, which is
54+
// val + (val >= 0.0f ? 0.5f : -0.5f), where val is float
55+
v128_t c = wasm_f32x4_ge(a, zero); // greater or equal to zero
56+
v128_t p = wasm_f32x4_add(a, half); // for positive, add half
57+
v128_t n = wasm_f32x4_sub(a, half); // for negative, subtract half
58+
v128_t d = wasm_v128_and(c, p); // keep positive only
59+
v128_t e = wasm_v128_andnot(n, c); // keep negative only
60+
v128_t v = wasm_v128_or(d, e); // combine
61+
return wasm_i32x4_trunc_sat_f32x4(v);// truncate (towards 0)
62+
}
63+
5064
//////////////////////////////////////////////////////////////////////////
5165
void wasm_rev_convert(const line_buf *src_line,
5266
const ui32 src_line_offset,
@@ -129,7 +143,7 @@ namespace ojph {
129143
v128_t c = wasm_i32x4_lt(s, zero); // 0xFFFFFFFF for -ve value
130144
v128_t v_m_sh = wasm_i32x4_sub(sh, s); // - shift - value
131145
v_m_sh = wasm_v128_and(c, v_m_sh); // keep only - shift - value
132-
s = wasm_v128_andnot(c, s); // keep only +ve or 0
146+
s = wasm_v128_andnot(s, c); // keep only +ve or 0
133147
s = wasm_v128_or(s, v_m_sh); // combine
134148
wasm_v128_store(dp, s);
135149
}
@@ -149,7 +163,7 @@ namespace ojph {
149163
c = wasm_i64x2_lt(u, zero); // 64b -1 for -ve value
150164
v_m_sh = wasm_i64x2_sub(sh, u); // - shift - value
151165
v_m_sh = wasm_v128_and(c, v_m_sh); // keep only - shift - value
152-
u = wasm_v128_andnot(c, u); // keep only +ve or 0
166+
u = wasm_v128_andnot(u, c); // keep only +ve or 0
153167
u = wasm_v128_or(u, v_m_sh); // combine
154168

155169
wasm_v128_store(dp, u);
@@ -158,7 +172,7 @@ namespace ojph {
158172
c = wasm_i64x2_lt(u, zero); // 64b -1 for -ve value
159173
v_m_sh = wasm_i64x2_sub(sh, u); // - shift - value
160174
v_m_sh = wasm_v128_and(c, v_m_sh); // keep only - shift - value
161-
u = wasm_v128_andnot(c, u); // keep only +ve or 0
175+
u = wasm_v128_andnot(u, c); // keep only +ve or 0
162176
u = wasm_v128_or(u, v_m_sh); // combine
163177

164178
wasm_v128_store(dp + 2, u);
@@ -182,14 +196,14 @@ namespace ojph {
182196
m = wasm_i64x2_lt(s, zero); // 64b -1 for -ve value
183197
tm = wasm_i64x2_sub(sh, s); // - shift - value
184198
n = wasm_v128_and(m, tm); // -ve
185-
p = wasm_v128_andnot(m, s); // +ve
199+
p = wasm_v128_andnot(s, m); // +ve
186200
t0 = wasm_v128_or(n, p);
187201

188202
s = wasm_v128_load(sp + 2);
189203
m = wasm_i64x2_lt(s, zero); // 64b -1 for -ve value
190204
tm = wasm_i64x2_sub(sh, s); // - shift - value
191205
n = wasm_v128_and(m, tm); // -ve
192-
p = wasm_v128_andnot(m, s); // +ve
206+
p = wasm_v128_andnot(s, m); // +ve
193207
t1 = wasm_v128_or(n, p);
194208

195209
t0 = wasm_i32x4_shuffle(t0, t1, 0, 2, 4 + 0, 4 + 2);
@@ -232,32 +246,32 @@ namespace ojph {
232246
void wasm_cnvrt_float_to_si32_shftd(const float *sp, si32 *dp, float mul,
233247
ui32 width)
234248
{
235-
// rounding mode is always set to _MM_ROUND_NEAREST
236-
v128_t shift = wasm_f32x4_splat(0.5f);
249+
const v128_t zero = wasm_f32x4_splat(0.0f);
250+
const v128_t half = wasm_f32x4_splat(0.5f);
237251
v128_t m = wasm_f32x4_splat(mul);
238252
for (int i = (width + 3) >> 2; i > 0; --i, sp+=4, dp+=4)
239253
{
240254
v128_t t = wasm_v128_load(sp);
241-
v128_t s = wasm_f32x4_add(t, shift);
255+
v128_t s = wasm_f32x4_add(t, half);
242256
s = wasm_f32x4_mul(s, m);
243-
s = wasm_f32x4_add(s, shift); // + 0.5 and followed by floor next
244-
wasm_v128_store(dp, wasm_i32x4_trunc_sat_f32x4(s));
257+
s = wasm_f32x4_add(s, half); // + 0.5 and followed by floor next
258+
wasm_v128_store(dp, ojph_convert_float_to_i32(s, zero, half));
245259
}
246260
}
247261

248262
//////////////////////////////////////////////////////////////////////////
249263
void wasm_cnvrt_float_to_si32(const float *sp, si32 *dp, float mul,
250264
ui32 width)
251265
{
252-
// rounding mode is always set to _MM_ROUND_NEAREST
253-
v128_t shift = wasm_f32x4_splat(0.5f);
266+
const v128_t zero = wasm_f32x4_splat(0.0f);
267+
const v128_t half = wasm_f32x4_splat(0.5f);
254268
v128_t m = wasm_f32x4_splat(mul);
255269
for (int i = (width + 3) >> 2; i > 0; --i, sp+=4, dp+=4)
256270
{
257271
v128_t t = wasm_v128_load(sp);
258272
v128_t s = wasm_f32x4_mul(t, m);
259-
s = wasm_f32x4_add(s, shift); // + 0.5 and followed by floor next
260-
wasm_v128_store(dp, wasm_i32x4_trunc_sat_f32x4(s));
273+
s = wasm_f32x4_add(s, half); // + 0.5 and followed by floor next
274+
wasm_v128_store(dp, ojph_convert_float_to_i32(s, zero, half));
261275
}
262276
}
263277

@@ -267,7 +281,7 @@ namespace ojph {
267281
{
268282
v128_t c = wasm_i32x4_ge(x, y); // 0xFFFFFFFF for x >= y
269283
v128_t d = wasm_v128_and(c, a); // keep only a, where x >= y
270-
v128_t e = wasm_v128_andnot(c, b); // keep only b, where x < y
284+
v128_t e = wasm_v128_andnot(b, c); // keep only b, where x < y
271285
return wasm_v128_or(d, e); // combine
272286
}
273287

@@ -277,7 +291,7 @@ namespace ojph {
277291
{
278292
v128_t c = wasm_i32x4_lt(x, y); // 0xFFFFFFFF for x < y
279293
v128_t d = wasm_v128_and(c, a); // keep only a, where x < y
280-
v128_t e = wasm_v128_andnot(c, b); // keep only b, where x >= y
294+
v128_t e = wasm_v128_andnot(b, c); // keep only b, where x >= y
281295
return wasm_v128_or(d, e); // combine
282296
}
283297

@@ -291,8 +305,6 @@ namespace ojph {
291305
(dst_line->flags & line_buf::LFT_32BIT) &&
292306
(dst_line->flags & line_buf::LFT_INTEGER));
293307

294-
// rounding mode is always set to _MM_ROUND_NEAREST
295-
296308
const float* sp = src_line->f32;
297309
si32* dp = dst_line->i32 + dst_line_offset;
298310
if (bit_depth <= 30)
@@ -306,34 +318,37 @@ namespace ojph {
306318

307319
if (is_signed)
308320
{
309-
v128_t zero = wasm_i32x4_splat(0);
321+
const v128_t zero = wasm_f32x4_splat(0.0f);
322+
const v128_t half = wasm_f32x4_splat(0.5f);
310323
v128_t bias = wasm_i32x4_splat(-((1 << (bit_depth - 1)) + 1));
311324
for (ui32 i = width; i > 0; i -= 4, sp += 4, dp += 4)
312325
{
313326
v128_t t = wasm_v128_load(sp);
314327
t = wasm_f32x4_mul(t, mul);
315-
v128_t u = wasm_i32x4_trunc_sat_f32x4(t);
328+
v128_t u = ojph_convert_float_to_i32(t, zero, half);
316329
u = wasm_i32x4_max(u, lower_limit);
317330
u = wasm_i32x4_min(u, upper_limit);
318331

319332
v128_t c = wasm_i32x4_gt(zero, u); //0xFFFFFFFF for -ve value
320333
v128_t neg = wasm_i32x4_sub(bias, u); //-bias -value
321334
neg = wasm_v128_and(c, neg); //keep only - bias - value
322-
v128_t v = wasm_v128_andnot(c, u); //keep only +ve or 0
335+
v128_t v = wasm_v128_andnot(u, c); //keep only +ve or 0
323336
v = wasm_v128_or(neg, v); //combine
324337
wasm_v128_store(dp, v);
325338
}
326339
}
327340
else
328341
{
329-
v128_t half = wasm_i32x4_splat(-(1 << (bit_depth - 1)));
342+
const v128_t zero = wasm_f32x4_splat(0.0f);
343+
const v128_t half = wasm_f32x4_splat(0.5f);
344+
v128_t ihalf = wasm_i32x4_splat(-(1 << (bit_depth - 1)));
330345
for (ui32 i = width; i > 0; i -= 4, sp += 4, dp += 4) {
331346
v128_t t = wasm_v128_load(sp);
332347
t = wasm_f32x4_mul(t, mul);
333-
v128_t u = wasm_i32x4_trunc_sat_f32x4(t);
348+
v128_t u = ojph_convert_float_to_i32(t, zero, half);
334349
u = wasm_i32x4_max(u, lower_limit);
335350
u = wasm_i32x4_min(u, upper_limit);
336-
u = wasm_i32x4_add(u, half);
351+
u = wasm_i32x4_add(u, ihalf);
337352
wasm_v128_store(dp, u);
338353
}
339354
}
@@ -359,32 +374,35 @@ namespace ojph {
359374

360375
if (is_signed)
361376
{
362-
v128_t zero = wasm_i32x4_splat(0);
377+
const v128_t zero = wasm_f32x4_splat(0.0f);
378+
const v128_t half = wasm_f32x4_splat(0.5f);
363379
v128_t bias = wasm_i32x4_splat(-((1 << (bit_depth - 1)) + 1));
364380
for (ui32 i = width; i > 0; i -= 4, sp += 4, dp += 4) {
365381
v128_t t = wasm_v128_load(sp);
366382
t = wasm_f32x4_mul(t, mul);
367-
v128_t u = wasm_i32x4_trunc_sat_f32x4(t);
383+
v128_t u = ojph_convert_float_to_i32(t, zero, half);
368384
u = ojph_wasm_i32x4_max_ge(u, s32_lower_limit, t, fl_lower_limit);
369385
u = ojph_wasm_i32x4_min_lt(u, s32_upper_limit, t, fl_upper_limit);
370386
v128_t c = wasm_i32x4_gt(zero, u); //0xFFFFFFFF for -ve value
371387
v128_t neg = wasm_i32x4_sub(bias, u); //-bias -value
372388
neg = wasm_v128_and(c, neg); //keep only - bias - value
373-
v128_t v = wasm_v128_andnot(c, u); //keep only +ve or 0
389+
v128_t v = wasm_v128_andnot(u, c); //keep only +ve or 0
374390
v = wasm_v128_or(neg, v); //combine
375391
wasm_v128_store(dp, v);
376392
}
377393
}
378394
else
379395
{
380-
v128_t half = wasm_i32x4_splat(-(1 << (bit_depth - 1)));
396+
const v128_t zero = wasm_f32x4_splat(0.0f);
397+
const v128_t half = wasm_f32x4_splat(0.5f);
398+
v128_t ihalf = wasm_i32x4_splat(-(1 << (bit_depth - 1)));
381399
for (ui32 i = width; i > 0; i -= 4, sp += 4, dp += 4) {
382400
v128_t t = wasm_v128_load(sp);
383401
t = wasm_f32x4_mul(t, mul);
384-
v128_t u = wasm_i32x4_trunc_sat_f32x4(t);
402+
v128_t u = ojph_convert_float_to_i32(t, zero, half);
385403
u = ojph_wasm_i32x4_max_ge(u, s32_lower_limit, t, fl_lower_limit);
386404
u = ojph_wasm_i32x4_min_lt(u, s32_upper_limit, t, fl_upper_limit);
387-
u = wasm_i32x4_add(u, half);
405+
u = wasm_i32x4_add(u, ihalf);
388406
wasm_v128_store(dp, u);
389407
}
390408
}
@@ -416,7 +434,7 @@ namespace ojph {
416434
v128_t c = wasm_i32x4_lt(u, zero); // 0xFFFFFFFF for -ve value
417435
v128_t neg = wasm_i32x4_sub(bias, u); // - bias - value
418436
neg = wasm_v128_and(c, neg); // keep only - bias - value
419-
t = wasm_v128_andnot(c, u); // keep only +ve or 0
437+
t = wasm_v128_andnot(u, c); // keep only +ve or 0
420438
u = wasm_v128_or(neg, t); // combine
421439
v128_t v = wasm_f32x4_convert_i32x4(u);
422440
v = wasm_f32x4_mul(v, mul);

0 commit comments

Comments
 (0)