Skip to content

Commit f37ae83

Browse files
committed
All wasm code has been done -- needs extensive tests.
1 parent 34e3fc3 commit f37ae83

11 files changed

+1204
-348
lines changed

src/core/codestream/ojph_codeblock_fun.cpp

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ namespace ojph {
9797
float delta_inv, ui32 count, ui64* max_val);
9898
void avx2_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max,
9999
float delta_inv, ui32 count, ui64* max_val);
100+
void wasm_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max,
101+
float delta_inv, ui32 count, ui64* max_val);
100102

101103
//////////////////////////////////////////////////////////////////////////
102104
void gen_rev_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
@@ -122,6 +124,8 @@ namespace ojph {
122124
float delta, ui32 count);
123125
void avx2_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max,
124126
float delta, ui32 count);
127+
void wasm_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max,
128+
float delta, ui32 count);
125129

126130
void codeblock_fun::init(bool reversible) {
127131

@@ -240,18 +244,31 @@ namespace ojph {
240244
#else // OJPH_ENABLE_WASM_SIMD
241245

242246
// Accelerated functions for WASM SIMD.
243-
decode_cb = ojph_decode_codeblock_wasm;
244-
find_max_val = wasm_find_max_val;
247+
decode_cb32 = ojph_decode_codeblock_wasm;
248+
find_max_val32 = wasm_find_max_val32;
245249
mem_clear = wasm_mem_clear;
246250
if (reversible) {
247-
tx_to_cb = wasm_rev_tx_to_cb;
248-
tx_from_cb = wasm_rev_tx_from_cb;
251+
tx_to_cb32 = wasm_rev_tx_to_cb32;
252+
tx_from_cb32 = wasm_rev_tx_from_cb32;
249253
}
250254
else {
251-
tx_to_cb = wasm_irv_tx_to_cb;
252-
tx_from_cb = wasm_irv_tx_from_cb;
255+
tx_to_cb32 = wasm_irv_tx_to_cb32;
256+
tx_from_cb32 = wasm_irv_tx_from_cb32;
257+
}
258+
encode_cb32 = ojph_encode_codeblock32;
259+
260+
decode_cb64 = ojph_decode_codeblock64;
261+
find_max_val64 = wasm_find_max_val64;
262+
if (reversible) {
263+
tx_to_cb64 = wasm_rev_tx_to_cb64;
264+
tx_from_cb64 = wasm_rev_tx_from_cb64;
253265
}
254-
encode_cb = ojph_encode_codeblock;
266+
else
267+
{
268+
tx_to_cb64 = NULL;
269+
tx_from_cb64 = NULL;
270+
}
271+
encode_cb64 = ojph_encode_codeblock64;
255272

256273
#endif // !OJPH_ENABLE_WASM_SIMD
257274

src/core/codestream/ojph_codestream_wasm.cpp

Lines changed: 83 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
// Date: 15 May 2022
3636
//***************************************************************************/
3737

38+
#include <climits>
3839
#include <cstddef>
3940
#include <wasm_simd128.h>
4041

@@ -43,20 +44,17 @@
4344
namespace ojph {
4445
namespace local {
4546

46-
//////////////////////////////////////////////////////////////////////////
47-
#define REPEAT(a) a,a,a,a
48-
4947
//////////////////////////////////////////////////////////////////////////
5048
void wasm_mem_clear(void* addr, size_t count)
5149
{
5250
float* p = (float*)addr;
53-
v128_t zero = wasm_i32x4_const(REPEAT(0));
51+
v128_t zero = wasm_i32x4_splat(0);
5452
for (size_t i = 0; i < count; i += 16, p += 4)
5553
wasm_v128_store(p, zero);
5654
}
5755

5856
//////////////////////////////////////////////////////////////////////////
59-
ui32 wasm_find_max_val(ui32* address)
57+
ui32 wasm_find_max_val32(ui32* address)
6058
{
6159
v128_t x1, x0 = wasm_v128_load(address);
6260
x1 = wasm_i32x4_shuffle(x0, x0, 2, 3, 2, 3); // x1 = x0[2,3,2,3]
@@ -68,16 +66,26 @@ namespace ojph {
6866
}
6967

7068
//////////////////////////////////////////////////////////////////////////
71-
void wasm_rev_tx_to_cb(const void *sp, ui32 *dp, ui32 K_max,
72-
float delta_inv, ui32 count, ui32* max_val)
69+
ui64 wasm_find_max_val64(ui64* address)
70+
{
71+
v128_t x1, x0 = wasm_v128_load(address);
72+
x1 = wasm_i64x2_shuffle(x0, x0, 1, 1); // x1 = x0[2,3,2,3]
73+
x0 = wasm_v128_or(x0, x1);
74+
ui64 t = (ui64)wasm_i64x2_extract_lane(x0, 0);
75+
return t;
76+
}
77+
78+
//////////////////////////////////////////////////////////////////////////
79+
void wasm_rev_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max,
80+
float delta_inv, ui32 count, ui32* max_val)
7381
{
7482
ojph_unused(delta_inv);
7583

7684
// convert to sign and magnitude and keep max_val
7785
ui32 shift = 31 - K_max;
78-
v128_t m0 = wasm_i32x4_const(REPEAT((int)0x80000000));
79-
v128_t zero = wasm_i32x4_const(REPEAT(0));
80-
v128_t one = wasm_i32x4_const(REPEAT(1));
86+
v128_t m0 = wasm_i32x4_splat(INT_MIN);
87+
v128_t zero = wasm_i32x4_splat(0);
88+
v128_t one = wasm_i32x4_splat(1);
8189
v128_t tmax = wasm_v128_load(max_val);
8290
v128_t *p = (v128_t*)sp;
8391
for (ui32 i = 0; i < count; i += 4, p += 1, dp += 4)
@@ -97,16 +105,16 @@ namespace ojph {
97105
}
98106

99107
//////////////////////////////////////////////////////////////////////////
100-
void wasm_irv_tx_to_cb(const void *sp, ui32 *dp, ui32 K_max,
101-
float delta_inv, ui32 count, ui32* max_val)
108+
void wasm_irv_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max,
109+
float delta_inv, ui32 count, ui32* max_val)
102110
{
103111
ojph_unused(K_max);
104112

105113
//quantize and convert to sign and magnitude and keep max_val
106114

107115
v128_t d = wasm_f32x4_splat(delta_inv);
108-
v128_t zero = wasm_i32x4_const(REPEAT(0));
109-
v128_t one = wasm_i32x4_const(REPEAT(1));
116+
v128_t zero = wasm_i32x4_splat(0);
117+
v128_t one = wasm_i32x4_splat(1);
110118
v128_t tmax = wasm_v128_load(max_val);
111119
float *p = (float*)sp;
112120
for (ui32 i = 0; i < count; i += 4, p += 4, dp += 4)
@@ -127,14 +135,14 @@ namespace ojph {
127135
}
128136

129137
//////////////////////////////////////////////////////////////////////////
130-
void wasm_rev_tx_from_cb(const ui32 *sp, void *dp, ui32 K_max,
131-
float delta, ui32 count)
138+
void wasm_rev_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
139+
float delta, ui32 count)
132140
{
133141
ojph_unused(delta);
134142
ui32 shift = 31 - K_max;
135-
v128_t m1 = wasm_i32x4_const(REPEAT(0x7FFFFFFF));
136-
v128_t zero = wasm_i32x4_const(REPEAT(0));
137-
v128_t one = wasm_i32x4_const(REPEAT(1));
143+
v128_t m1 = wasm_i32x4_splat(INT_MAX);
144+
v128_t zero = wasm_i32x4_splat(0);
145+
v128_t one = wasm_i32x4_splat(1);
138146
si32 *p = (si32*)dp;
139147
for (ui32 i = 0; i < count; i += 4, sp += 4, p += 4)
140148
{
@@ -150,11 +158,11 @@ namespace ojph {
150158
}
151159

152160
//////////////////////////////////////////////////////////////////////////
153-
void wasm_irv_tx_from_cb(const ui32 *sp, void *dp, ui32 K_max,
154-
float delta, ui32 count)
161+
void wasm_irv_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
162+
float delta, ui32 count)
155163
{
156164
ojph_unused(K_max);
157-
v128_t m1 = wasm_i32x4_const(REPEAT(0x7FFFFFFF));
165+
v128_t m1 = wasm_i32x4_splat(INT_MAX);
158166
v128_t d = wasm_f32x4_splat(delta);
159167
float *p = (float*)dp;
160168
for (ui32 i = 0; i < count; i += 4, sp += 4, p += 4)
@@ -167,6 +175,58 @@ namespace ojph {
167175
valf = wasm_v128_or(valf, sign);
168176
wasm_v128_store(p, valf);
169177
}
170-
}
178+
}
179+
180+
//////////////////////////////////////////////////////////////////////////
181+
void wasm_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max,
182+
float delta_inv, ui32 count, ui64* max_val)
183+
{
184+
ojph_unused(delta_inv);
185+
186+
// convert to sign and magnitude and keep max_val
187+
ui32 shift = 63 - K_max;
188+
v128_t m0 = wasm_i64x2_splat(LLONG_MIN);
189+
v128_t zero = wasm_i64x2_splat(0);
190+
v128_t one = wasm_i64x2_splat(1);
191+
v128_t tmax = wasm_v128_load(max_val);
192+
si64 *p = (si64*)sp;
193+
for (ui32 i = 0; i < count; i += 2, p += 2, dp += 2)
194+
{
195+
v128_t v = wasm_v128_load((v128_t*)sp);
196+
v128_t sign = wasm_i64x2_lt(v, zero);
197+
v128_t val = wasm_v128_xor(v, sign); // negate 1's complement
198+
v128_t ones = wasm_v128_and(sign, one);
199+
val = wasm_i64x2_add(val, ones); // 2's complement
200+
sign = wasm_v128_and(sign, m0);
201+
val = wasm_i64x2_shl(val, shift);
202+
tmax = wasm_v128_or(tmax, val);
203+
val = wasm_v128_or(val, sign);
204+
wasm_v128_store(dp, val);
205+
}
206+
wasm_v128_store(max_val, tmax);
207+
}
208+
209+
//////////////////////////////////////////////////////////////////////////
210+
void wasm_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max,
211+
float delta, ui32 count)
212+
{
213+
ojph_unused(delta);
214+
ui32 shift = 63 - K_max;
215+
v128_t m1 = wasm_i64x2_splat(LLONG_MAX);
216+
v128_t zero = wasm_i64x2_splat(0);
217+
v128_t one = wasm_i64x2_splat(1);
218+
si64 *p = (si64*)dp;
219+
for (ui32 i = 0; i < count; i += 2, sp += 2, p += 2)
220+
{
221+
v128_t v = wasm_v128_load((v128_t*)sp);
222+
v128_t val = wasm_v128_and(v, m1);
223+
val = wasm_i64x2_shr(val, shift);
224+
v128_t sign = wasm_i64x2_lt(v, zero);
225+
val = wasm_v128_xor(val, sign); // negate 1's complement
226+
v128_t ones = wasm_v128_and(sign, one);
227+
val = wasm_i64x2_add(val, ones); // 2's complement
228+
wasm_v128_store(p, val);
229+
}
230+
}
171231
}
172232
}

src/core/codestream/ojph_params_local.h

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,16 @@ namespace ojph {
176176
public:
177177
param_siz()
178178
{
179-
memset(this, 0, sizeof(param_siz));
179+
Lsiz = Csiz = 0;
180+
Xsiz = Ysiz = XOsiz = YOsiz = XTsiz = YTsiz = XTOsiz = YTOsiz = 0;
181+
skipped_resolutions = 0;
182+
memset(store, 0, sizeof(store));
183+
ws_kern_support_needed = dfs_support_needed = false;
184+
cod = NULL;
185+
dfs = NULL;
186+
Rsiz = RSIZ_HT_FLAG;
180187
cptr = store;
181188
old_Csiz = 4;
182-
Rsiz = RSIZ_HT_FLAG;
183189
}
184190

185191
~param_siz()
@@ -882,9 +888,10 @@ namespace ojph {
882888
};
883889

884890
public: // member functions
885-
param_dfs() { memset(this, 0, sizeof(param_dfs)); }
891+
param_dfs() { init(); }
886892
~param_dfs() { if (next) delete next; }
887-
void init() { memset(this, 0, sizeof(param_dfs)); }
893+
void init()
894+
{ Ldfs = Sdfs = Ids = 0; memset(Ddfs, 0, sizeof(Ddfs)); next = NULL; }
888895
bool read(infile_base *file);
889896
bool exists() const { return Ldfs != 0; }
890897

@@ -959,8 +966,17 @@ namespace ojph {
959966
bool read_coefficient(infile_base *file, float &K);
960967
bool read_coefficient(infile_base *file, si16 &K);
961968
void init(bool clear_all = true) {
962-
if (clear_all)
963-
memset(this, 0, sizeof(param_atk));
969+
if (clear_all)
970+
{
971+
Latk = Satk = 0;
972+
Katk = 0.0f;
973+
Natk = 0;
974+
d = NULL;
975+
max_steps = 0;
976+
memset(d_store, 0, sizeof(d_store));
977+
next = NULL;
978+
alloced_next = false;
979+
}
964980
d = d_store; max_steps = sizeof(d_store) / sizeof(lifting_step);
965981
}
966982
void init_irv97();

src/core/transform/ojph_colour.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,9 @@ namespace ojph {
177177
#endif // !OJPH_DISABLE_SIMD
178178

179179
#else // OJPH_ENABLE_WASM_SIMD
180-
cnvrt_si32_to_si32_shftd = wasm_cnvrt_si32_to_si32_shftd;
181-
cnvrt_si32_to_si32_nlt_type3 = wasm_cnvrt_si32_to_si32_nlt_type3;
180+
181+
rev_convert = wasm_rev_convert;
182+
rev_convert_nlt_type3 = wasm_rev_convert_nlt_type3;
182183
cnvrt_si32_to_float_shftd = wasm_cnvrt_si32_to_float_shftd;
183184
cnvrt_si32_to_float = wasm_cnvrt_si32_to_float;
184185
cnvrt_float_to_si32_shftd = wasm_cnvrt_float_to_si32_shftd;
@@ -187,6 +188,7 @@ namespace ojph {
187188
rct_backward = wasm_rct_backward;
188189
ict_forward = wasm_ict_forward;
189190
ict_backward = wasm_ict_backward;
191+
190192
#endif // !OJPH_ENABLE_WASM_SIMD
191193

192194
colour_transform_functions_initialized = true;

src/core/transform/ojph_colour_local.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -275,12 +275,16 @@ namespace ojph {
275275
ui32 width);
276276

277277
//////////////////////////////////////////////////////////////////////////
278-
void wasm_cnvrt_si32_to_si32_shftd(const si32 *sp, si32 *dp, int shift,
279-
ui32 width);
278+
void wasm_rev_convert(
279+
const line_buf *src_line, const ui32 src_line_offset,
280+
line_buf *dst_line, const ui32 dst_line_offset,
281+
si64 shift, ui32 width);
280282

281283
//////////////////////////////////////////////////////////////////////////
282-
void wasm_cnvrt_si32_to_si32_nlt_type3(const si32 *sp, si32 *dp,
283-
int shift, ui32 width);
284+
void wasm_rev_convert_nlt_type3(
285+
const line_buf *src_line, const ui32 src_line_offset,
286+
line_buf *dst_line, const ui32 dst_line_offset,
287+
si64 shift, ui32 width);
284288

285289
//////////////////////////////////////////////////////////////////////////
286290
void wasm_rct_forward(

0 commit comments

Comments
 (0)