Skip to content

Commit 28b4456

Browse files
Merge branch 'aous72:master' into feature/add-32bit-tif-support
2 parents e53fdbe + 967863d commit 28b4456

File tree

12 files changed

+69
-45
lines changed

12 files changed

+69
-45
lines changed

src/apps/ojph_stream_expand/ojph_stream_expand.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ int main(int argc, char* argv[])
251251
OJPH_INFO(0x02000005,
252252
"Error converting source address: %s", err.data());
253253
}
254-
printf("Listining on %s, port %d\n", t, ntohs(server.sin_port));
254+
printf("Listening on %s, port %d\n", t, ntohs(server.sin_port));
255255
}
256256

257257
// process the source IPv4 address and port
@@ -319,7 +319,7 @@ int main(int argc, char* argv[])
319319
OJPH_INFO(0x02000004,
320320
"Error converting source address: %s", err.data());
321321
}
322-
printf("Source mistmatch %s, port %d\n",
322+
printf("Source mismatch %s, port %d\n",
323323
t, ntohs(si_other.sin_port));
324324
continue;
325325
}
@@ -353,7 +353,7 @@ int main(int argc, char* argv[])
353353
ojph::ui32 total_frames = 0, trunc_frames = 0, lost_frames = 0;
354354
frames_handler.get_stats(total_frames, trunc_frames, lost_frames);
355355

356-
printf("Total frame %d, tuncated frames %d, lost frames %d, "
356+
printf("Total frame %d, truncated frames %d, lost frames %d, "
357357
"packets lost %d\n",
358358
total_frames, trunc_frames, lost_frames, lost_packets);
359359
}

src/apps/ojph_stream_expand/stream_expand_support.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ void frames_handler::push(rtp_packet* p)
301301
if (p->get_packet_type() != rtp_packet::PT_BODY)
302302
{ // main packet payload
303303

304-
// The existance of a previous frame means we did not get the marked
304+
// The existence of a previous frame means we did not get the marked
305305
// packet. Here, we close the frame and move it to processing
306306
if (in_use) {
307307
++trunc_frames;

src/apps/ojph_stream_expand/stream_expand_support.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ struct j2k_frame_storer;
6868
///////////////////////////////////////////////////////////////////////////////
6969

7070
/*****************************************************************************/
71-
/** @brief inteprets RTP header and payload, and holds received packets.
71+
/** @brief interprets RTP header and payload, and holds received packets.
7272
*
7373
* This object interpret RFC 3550 and draft-ietf-avtcore-rtp-j2k-scl-00.
7474
* The implementation is not complete, but it is sufficient for the time

src/apps/others/ojph_img_io_avx2.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,14 @@ namespace ojph {
212212
_mm_storeu_si128((__m128i*)(p + 48), _mm256_castsi256_si128(v));
213213
_mm_storeu_si128((__m128i*)(p + 60), _mm256_extracti128_si256(v,1));
214214
_mm_storeu_si128((__m128i*)(p + 72), _mm256_castsi256_si128(w));
215+
#ifdef OJPH_ARCH_X86_64
215216
*((si64*)(p + 84)) = _mm256_extract_epi64(w, 2);
217+
#elif (defined OJPH_ARCH_I386)
218+
*((si32*)(p + 84)) = _mm256_extract_epi32(w, 4);
219+
*((si32*)(p + 88)) = _mm256_extract_epi32(w, 5);
220+
#else
221+
#error Error unsupport compiler
222+
#endif
216223
*((si32*)(p + 92)) = _mm256_extract_epi32(w, 6);
217224

218225
// this is an alterative slower implementation

src/core/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,14 +86,20 @@ else()
8686
set_source_files_properties(transform/ojph_transform_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2")
8787
set_source_files_properties(transform/ojph_transform_avx512.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX512")
8888
else()
89+
set_source_files_properties(codestream/ojph_codestream_sse.cpp PROPERTIES COMPILE_FLAGS -msse)
90+
set_source_files_properties(codestream/ojph_codestream_sse2.cpp PROPERTIES COMPILE_FLAGS -msse2)
8991
set_source_files_properties(codestream/ojph_codestream_avx.cpp PROPERTIES COMPILE_FLAGS -mavx)
9092
set_source_files_properties(codestream/ojph_codestream_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2)
9193
set_source_files_properties(coding/ojph_block_decoder_ssse3.cpp PROPERTIES COMPILE_FLAGS -mssse3)
9294
set_source_files_properties(coding/ojph_block_decoder_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2)
9395
set_source_files_properties(coding/ojph_block_encoder_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2)
9496
set_source_files_properties(coding/ojph_block_encoder_avx512.cpp PROPERTIES COMPILE_FLAGS -mavx512cd)
97+
set_source_files_properties(transform/ojph_colour_sse.cpp PROPERTIES COMPILE_FLAGS -msse)
98+
set_source_files_properties(transform/ojph_colour_sse2.cpp PROPERTIES COMPILE_FLAGS -msse2)
9599
set_source_files_properties(transform/ojph_colour_avx.cpp PROPERTIES COMPILE_FLAGS -mavx)
96100
set_source_files_properties(transform/ojph_colour_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2)
101+
set_source_files_properties(transform/ojph_transform_sse.cpp PROPERTIES COMPILE_FLAGS -msse)
102+
set_source_files_properties(transform/ojph_transform_sse2.cpp PROPERTIES COMPILE_FLAGS -msse2)
97103
set_source_files_properties(transform/ojph_transform_avx.cpp PROPERTIES COMPILE_FLAGS -mavx)
98104
set_source_files_properties(transform/ojph_transform_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2)
99105
set_source_files_properties(transform/ojph_transform_avx512.cpp PROPERTIES COMPILE_FLAGS -mavx512f)

src/core/codestream/ojph_codestream_avx2.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include <climits>
3939
#include <immintrin.h>
4040
#include "ojph_defs.h"
41+
#include "ojph_arch.h"
4142

4243
namespace ojph {
4344
namespace local {
@@ -64,7 +65,15 @@ namespace ojph {
6465
x0 = _mm_or_si128(x0, x1);
6566
x1 = _mm_shuffle_epi32(x0, 0xEE); // x1 = x0[2,3,2,3]
6667
x0 = _mm_or_si128(x0, x1);
67-
ui64 t = (ui64)_mm_extract_epi64(x0, 0);
68+
ui64 t;
69+
#ifdef OJPH_ARCH_X86_64
70+
t = (ui64)_mm_extract_epi64(x0, 0);
71+
#elif (defined OJPH_ARCH_I386)
72+
t = (ui64)(ui32)_mm_extract_epi32(x0, 0);
73+
t |= (ui64)(ui32)_mm_extract_epi32(x0, 1) << 32;
74+
#else
75+
#error Error unsupport compiler
76+
#endif
6877
return t;
6978
}
7079

src/core/coding/ojph_block_decoder_avx2.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1700,8 +1700,7 @@ namespace ojph {
17001700
__m128i r = _mm_or_si128(t0, t1);
17011701
r = _mm_shuffle_epi8(r, shuffle_mask);
17021702

1703-
// _mm_storeu_si32 is not defined, so we use this workaround
1704-
_mm_store_ss((float*)dp, _mm_castsi128_ps(r));
1703+
*(ui32*)dp = (ui32)_mm_extract_epi32(r, 0);
17051704
}
17061705
dp[0] = 0; // set an extra entry on the right with 0
17071706
}
@@ -1710,7 +1709,7 @@ namespace ojph {
17101709
ui16* dp = sigma + (y >> 2) * mstr;
17111710
__m128i zero = _mm_setzero_si128();
17121711
for (ui32 x = 0; x < width; x += 32, dp += 8)
1713-
_mm_store_si128((__m128i*)dp, zero);
1712+
_mm_storeu_si128((__m128i*)dp, zero);
17141713
dp[0] = 0; // set an extra entry on the right with 0
17151714
}
17161715
}

src/core/coding/ojph_block_decoder_ssse3.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1725,8 +1725,8 @@ namespace ojph {
17251725
__m128i r = _mm_or_si128(t0, t1);
17261726
r = _mm_shuffle_epi8(r, shuffle_mask);
17271727

1728-
// _mm_storeu_si32 is not defined, so we use this workaround
1729-
_mm_store_ss((float*)dp, _mm_castsi128_ps(r));
1728+
dp[0] = (ui16)_mm_extract_epi16(r, 0);
1729+
dp[1] = (ui16)_mm_extract_epi16(r, 1);
17301730
}
17311731
dp[0] = 0; // set an extra entry on the right with 0
17321732
}
@@ -1735,7 +1735,7 @@ namespace ojph {
17351735
ui16* dp = sigma + (y >> 2) * mstr;
17361736
__m128i zero = _mm_setzero_si128();
17371737
for (ui32 x = 0; x < width; x += 32, dp += 8)
1738-
_mm_store_si128((__m128i*)dp, zero);
1738+
_mm_storeu_si128((__m128i*)dp, zero);
17391739
dp[0] = 0; // set an extra entry on the right with 0
17401740
}
17411741
}

src/core/coding/ojph_block_encoder_avx2.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -802,9 +802,20 @@ static __m256i proc_cq2(ui32 x, __m256i *cx_val_vec, __m256i &rho_vec,
802802
auto lcxp1_vec = _mm256_permutevar8x32_epi32(cx_val_vec[x], right_shift);
803803
auto tmp = _mm256_permutevar8x32_epi32(lcxp1_vec, right_shift);
804804

805-
tmp = _mm256_insert_epi64(tmp, _mm_cvtsi128_si64(_mm256_castsi256_si128(cx_val_vec[x + 1])), 3);
805+
#ifdef OJPH_ARCH_X86_64
806+
tmp = _mm256_insert_epi64(tmp,
807+
_mm_cvtsi128_si64(_mm256_castsi256_si128(cx_val_vec[x + 1])), 3);
808+
#elif (defined OJPH_ARCH_I386)
809+
int lsb = _mm_cvtsi128_si32(_mm256_castsi256_si128(cx_val_vec[x + 1]));
810+
tmp = _mm256_insert_epi32(tmp, lsb, 6);
811+
int msb = _mm_extract_epi32(_mm256_castsi256_si128(cx_val_vec[x + 1]), 1);
812+
tmp = _mm256_insert_epi32(tmp, msb, 7);
813+
#else
814+
#error Error unsupport compiler
815+
#endif
806816
tmp = _mm256_slli_epi32(tmp, 2);
807-
auto tmp1 = _mm256_insert_epi32(lcxp1_vec, _mm_cvtsi128_si32(_mm256_castsi256_si128(cx_val_vec[x + 1])), 7);
817+
auto tmp1 = _mm256_insert_epi32(lcxp1_vec,
818+
_mm_cvtsi128_si32(_mm256_castsi256_si128(cx_val_vec[x + 1])), 7);
808819
tmp = _mm256_add_epi32(tmp1, tmp);
809820

810821
tmp1 = _mm256_and_si256(rho_vec, _mm256_set1_epi32(4));

src/core/common/ojph_arch.h

Lines changed: 21 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -166,32 +166,6 @@ namespace ojph {
166166
#endif
167167
}
168168

169-
/////////////////////////////////////////////////////////////////////////////
170-
static inline ui32 population_count64(ui64 val)
171-
{
172-
#if defined(OJPH_COMPILER_MSVC) \
173-
&& (defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))
174-
return (ui32)__popcnt64(val);
175-
#elif (defined OJPH_COMPILER_GNUC)
176-
return (ui32)__builtin_popcountll(val);
177-
#else
178-
const ui64 k1 = 0x5555555555555555ull;
179-
const ui64 k2 = 0x3333333333333333ull;
180-
const ui64 k4 = 0x0F0F0F0F0F0F0F0Full;
181-
const ui64 kf = 0x0101010101010101ull;
182-
183-
// put count of each 2 bits into those 2 bits
184-
val = val - ((val >> 1) & k1);
185-
// put count of each 4 bits into those 4 bits
186-
val = (val & k2) + ((val >> 2) & k2);
187-
// put count of each 8 bits into those 8 bits
188-
val = (val + (val >> 4)) & k4 ;
189-
// returns 8 most significant bits of x + (x<<8) + (x<<16) + (x<<24) + ...
190-
val = (val * kf) >> 56;
191-
return (ui32) val;
192-
#endif
193-
}
194-
195169
/////////////////////////////////////////////////////////////////////////////
196170
#ifdef OJPH_COMPILER_MSVC
197171
#pragma intrinsic(_BitScanReverse)
@@ -214,15 +188,33 @@ namespace ojph {
214188
#endif
215189
}
216190

217-
/////////////////////////////////////////////////////////////////////////////
191+
/////////////////////////////////////////////////////////////////////////////
218192
#ifdef OJPH_COMPILER_MSVC
219-
#pragma intrinsic(_BitScanReverse64)
193+
#if (defined OJPH_ARCH_X86_64)
194+
#pragma intrinsic(_BitScanReverse64)
195+
#elif (defined OJPH_ARCH_I386)
196+
#pragma intrinsic(_BitScanReverse)
197+
#else
198+
#error Error unsupport MSVC version
199+
#endif
220200
#endif
221201
static inline ui32 count_leading_zeros(ui64 val)
222202
{
223203
#ifdef OJPH_COMPILER_MSVC
224204
unsigned long result = 0;
225-
_BitScanReverse64(&result, val);
205+
#ifdef OJPH_ARCH_X86_64
206+
_BitScanReverse64(&result, val);
207+
#elif (defined OJPH_ARCH_I386)
208+
ui32 msb = (ui32)(val >> 32), lsb = (ui32)val;
209+
if (msb == 0)
210+
_BitScanReverse(&result, lsb);
211+
else {
212+
_BitScanReverse(&result, msb);
213+
result += 32;
214+
}
215+
#else
216+
#error Error unsupport MSVC version
217+
#endif
226218
return 63 ^ (ui32)result;
227219
#elif (defined OJPH_COMPILER_GNUC)
228220
return (ui32)__builtin_clzll(val);

0 commit comments

Comments
 (0)