Skip to content

Commit dee0bc7

Browse files
Merge branch 'aous72:master' into feature/add-32bit-tif-support
2 parents 28b4456 + 5df0f8c commit dee0bc7

File tree

11 files changed

+206
-36
lines changed

11 files changed

+206
-36
lines changed

CMakeLists.txt

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -165,12 +165,24 @@ endif()
165165

166166
include(GNUInstallDirs)
167167

168-
install(EXPORT openjph-config
168+
install(EXPORT openjph-targets
169169
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/openjph
170170
)
171171

172-
install(FILES "${CMAKE_BINARY_DIR}/${PROJECT_NAME}.pc"
173-
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
172+
include(CMakePackageConfigHelpers)
173+
174+
configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/src/openjph-config.cmake.in
175+
"${CMAKE_CURRENT_BINARY_DIR}/openjph-config.cmake"
176+
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/openjph
177+
)
178+
179+
write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/openjph-config-version.cmake
180+
COMPATIBILITY SameMinorVersion)
181+
182+
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/openjph-config.cmake
183+
${CMAKE_CURRENT_BINARY_DIR}/openjph-config-version.cmake
184+
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/openjph
185+
)
174186

175187
if(IS_ABSOLUTE "${CMAKE_INSTALL_INCLUDEDIR}")
176188
set(PKG_CONFIG_INCLUDEDIR "${CMAKE_INSTALL_INCLUDEDIR}")
@@ -185,21 +197,15 @@ else()
185197
endif()
186198

187199
configure_file(
188-
"${CMAKE_CURRENT_SOURCE_DIR}/src/pkg-config.pc.cmake"
200+
"${CMAKE_CURRENT_SOURCE_DIR}/src/openjph.pc.in"
189201
"${CMAKE_BINARY_DIR}/${PROJECT_NAME}.pc"
190202
@ONLY
191203
)
192204

193-
include(CMakePackageConfigHelpers)
194-
write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/openjph-config-version.cmake
195-
COMPATIBILITY SameMinorVersion)
196-
197-
install(
198-
FILES ${CMAKE_CURRENT_BINARY_DIR}/openjph-config-version.cmake
199-
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/openjph
205+
install(FILES "${CMAKE_BINARY_DIR}/${PROJECT_NAME}.pc"
206+
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig
200207
)
201208

202-
203209
################################################################################################
204210
# Testing (OJPH_BUILD_TESTS)
205211
################################################################################################
@@ -208,4 +214,3 @@ if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME AND OJPH_BUILD_TESTS)
208214
enable_testing()
209215
add_subdirectory(tests)
210216
endif()
211-

src/apps/ojph_compress/CMakeLists.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,4 @@ add_executable(ojph_compress ${SOURCES})
5050
target_include_directories(ojph_compress PRIVATE ../common)
5151
target_link_libraries(ojph_compress PRIVATE openjph $<TARGET_NAME_IF_EXISTS:TIFF::TIFF>)
5252

53-
install(TARGETS ojph_compress
54-
EXPORT openjph-config
55-
)
53+
install(TARGETS ojph_compress)

src/apps/ojph_expand/CMakeLists.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,4 @@ add_executable(ojph_expand ${SOURCES})
5050
target_include_directories(ojph_expand PRIVATE ../common)
5151
target_link_libraries(ojph_expand PRIVATE openjph $<TARGET_NAME_IF_EXISTS:TIFF::TIFF>)
5252

53-
install(TARGETS ojph_expand
54-
EXPORT openjph-config
55-
)
53+
install(TARGETS ojph_expand)

src/apps/ojph_stream_expand/CMakeLists.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,4 @@ else()
2323
target_link_libraries(ojph_stream_expand PUBLIC openjph pthread)
2424
endif(MSVC)
2525

26-
install(TARGETS ojph_stream_expand
27-
EXPORT openjph-config
28-
)
26+
install(TARGETS ojph_stream_expand)

src/core/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ else()
139139
endif()
140140

141141
install(TARGETS openjph
142-
EXPORT openjph-config
142+
EXPORT openjph-targets
143143
)
144144

145145
install(DIRECTORY common/

src/core/codestream/ojph_codestream_avx2.cpp

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ namespace ojph {
8888
__m256i m0 = _mm256_set1_epi32(INT_MIN);
8989
__m256i tmax = _mm256_loadu_si256((__m256i*)max_val);
9090
__m256i *p = (__m256i*)sp;
91-
for (ui32 i = 0; i < count; i += 8, p += 1, dp += 8)
91+
for ( ; count >= 8; count -= 8, p += 1, dp += 8)
9292
{
9393
__m256i v = _mm256_loadu_si256(p);
9494
__m256i sign = _mm256_and_si256(v, m0);
@@ -98,6 +98,22 @@ namespace ojph {
9898
val = _mm256_or_si256(val, sign);
9999
_mm256_storeu_si256((__m256i*)dp, val);
100100
}
101+
if (count)
102+
{
103+
__m256i v = _mm256_loadu_si256(p);
104+
__m256i sign = _mm256_and_si256(v, m0);
105+
__m256i val = _mm256_abs_epi32(v);
106+
val = _mm256_slli_epi32(val, (int)shift);
107+
108+
__m256i c = _mm256_set1_epi32((si32)count);
109+
__m256i idx = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
110+
__m256i mask = _mm256_cmpgt_epi32(c, idx);
111+
c = _mm256_and_si256(val, mask);
112+
tmax = _mm256_or_si256(tmax, c);
113+
114+
val = _mm256_or_si256(val, sign);
115+
_mm256_storeu_si256((__m256i*)dp, val);
116+
}
101117
_mm256_storeu_si256((__m256i*)max_val, tmax);
102118
}
103119

@@ -113,7 +129,7 @@ namespace ojph {
113129
__m256i tmax = _mm256_loadu_si256((__m256i*)max_val);
114130
float *p = (float*)sp;
115131

116-
for (ui32 i = 0; i < count; i += 8, p += 8, dp += 8)
132+
for ( ; count >= 8; count -= 8, p += 8, dp += 8)
117133
{
118134
__m256 vf = _mm256_loadu_ps(p);
119135
vf = _mm256_mul_ps(vf, d); // multiply
@@ -124,6 +140,23 @@ namespace ojph {
124140
val = _mm256_or_si256(val, sign);
125141
_mm256_storeu_si256((__m256i*)dp, val);
126142
}
143+
if (count)
144+
{
145+
__m256 vf = _mm256_loadu_ps(p);
146+
vf = _mm256_mul_ps(vf, d); // multiply
147+
__m256i val = _mm256_cvtps_epi32(vf); // convert to int
148+
__m256i sign = _mm256_and_si256(val, m0); // get sign
149+
val = _mm256_abs_epi32(val);
150+
151+
__m256i c = _mm256_set1_epi32((si32)count);
152+
__m256i idx = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
153+
__m256i mask = _mm256_cmpgt_epi32(c, idx);
154+
c = _mm256_and_si256(val, mask);
155+
tmax = _mm256_or_si256(tmax, c);
156+
157+
val = _mm256_or_si256(val, sign);
158+
_mm256_storeu_si256((__m256i*)dp, val);
159+
}
127160
_mm256_storeu_si256((__m256i*)max_val, tmax);
128161
}
129162

@@ -178,7 +211,7 @@ namespace ojph {
178211
__m256i one = _mm256_set1_epi64x(1);
179212
__m256i tmax = _mm256_loadu_si256((__m256i*)max_val);
180213
__m256i *p = (__m256i*)sp;
181-
for (ui32 i = 0; i < count; i += 4, p += 1, dp += 4)
214+
for ( ; count >= 4; count -= 4, p += 1, dp += 4)
182215
{
183216
__m256i v = _mm256_loadu_si256(p);
184217
__m256i sign = _mm256_cmpgt_epi64(zero, v);
@@ -191,6 +224,25 @@ namespace ojph {
191224
val = _mm256_or_si256(val, sign);
192225
_mm256_storeu_si256((__m256i*)dp, val);
193226
}
227+
if (count)
228+
{
229+
__m256i v = _mm256_loadu_si256(p);
230+
__m256i sign = _mm256_cmpgt_epi64(zero, v);
231+
__m256i val = _mm256_xor_si256(v, sign); // negate 1's complement
232+
__m256i ones = _mm256_and_si256(sign, one);
233+
val = _mm256_add_epi64(val, ones); // 2's complement
234+
sign = _mm256_and_si256(sign, m0);
235+
val = _mm256_slli_epi64(val, (int)shift);
236+
237+
__m256i c = _mm256_set1_epi64x(count);
238+
__m256i idx = _mm256_set_epi64x(3, 2, 1, 0);
239+
__m256i mask = _mm256_cmpgt_epi64(c, idx);
240+
c = _mm256_and_si256(val, mask);
241+
tmax = _mm256_or_si256(tmax, c);
242+
243+
val = _mm256_or_si256(val, sign);
244+
_mm256_storeu_si256((__m256i*)dp, val);
245+
}
194246
_mm256_storeu_si256((__m256i*)max_val, tmax);
195247
}
196248

src/core/codestream/ojph_codestream_sse2.cpp

Lines changed: 63 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ namespace ojph {
8787
__m128i one = _mm_set1_epi32(1);
8888
__m128i tmax = _mm_loadu_si128((__m128i*)max_val);
8989
__m128i *p = (__m128i*)sp;
90-
for (ui32 i = 0; i < count; i += 4, p += 1, dp += 4)
90+
for ( ; count >= 4; count -= 4, p += 1, dp += 4)
9191
{
9292
__m128i v = _mm_loadu_si128(p);
9393
__m128i sign = _mm_cmplt_epi32(v, zero);
@@ -100,6 +100,25 @@ namespace ojph {
100100
val = _mm_or_si128(val, sign);
101101
_mm_storeu_si128((__m128i*)dp, val);
102102
}
103+
if (count)
104+
{
105+
__m128i v = _mm_loadu_si128(p);
106+
__m128i sign = _mm_cmplt_epi32(v, zero);
107+
__m128i val = _mm_xor_si128(v, sign); // negate 1's complement
108+
__m128i ones = _mm_and_si128(sign, one);
109+
val = _mm_add_epi32(val, ones); // 2's complement
110+
sign = _mm_and_si128(sign, m0);
111+
val = _mm_slli_epi32(val, (int)shift);
112+
113+
__m128i c = _mm_set1_epi32((si32)count);
114+
__m128i idx = _mm_set_epi32(3, 2, 1, 0);
115+
__m128i mask = _mm_cmpgt_epi32(c, idx);
116+
c = _mm_and_si128(val, mask);
117+
tmax = _mm_or_si128(tmax, c);
118+
119+
val = _mm_or_si128(val, sign);
120+
_mm_storeu_si128((__m128i*)dp, val);
121+
}
103122
_mm_storeu_si128((__m128i*)max_val, tmax);
104123
}
105124

@@ -116,7 +135,7 @@ namespace ojph {
116135
__m128i one = _mm_set1_epi32(1);
117136
__m128i tmax = _mm_loadu_si128((__m128i*)max_val);
118137
float *p = (float*)sp;
119-
for (ui32 i = 0; i < count; i += 4, p += 4, dp += 4)
138+
for ( ; count >= 4; count -= 4, p += 4, dp += 4)
120139
{
121140
__m128 vf = _mm_loadu_ps(p);
122141
vf = _mm_mul_ps(vf, d); // multiply
@@ -130,6 +149,26 @@ namespace ojph {
130149
val = _mm_or_si128(val, sign);
131150
_mm_storeu_si128((__m128i*)dp, val);
132151
}
152+
if (count)
153+
{
154+
__m128 vf = _mm_loadu_ps(p);
155+
vf = _mm_mul_ps(vf, d); // multiply
156+
__m128i val = _mm_cvtps_epi32(vf); // convert to int
157+
__m128i sign = _mm_cmplt_epi32(val, zero); // get sign
158+
val = _mm_xor_si128(val, sign); // negate 1's complement
159+
__m128i ones = _mm_and_si128(sign, one);
160+
val = _mm_add_epi32(val, ones); // 2's complement
161+
162+
__m128i c = _mm_set1_epi32((si32)count);
163+
__m128i idx = _mm_set_epi32(3, 2, 1, 0);
164+
__m128i mask = _mm_cmpgt_epi32(c, idx);
165+
c = _mm_and_si128(val, mask);
166+
tmax = _mm_or_si128(tmax, c);
167+
168+
sign = _mm_slli_epi32(sign, 31);
169+
val = _mm_or_si128(val, sign);
170+
_mm_storeu_si128((__m128i*)dp, val);
171+
}
133172
_mm_storeu_si128((__m128i*)max_val, tmax);
134173
}
135174

@@ -189,7 +228,7 @@ namespace ojph {
189228
__m128i one = _mm_set1_epi64x(1);
190229
__m128i tmax = _mm_loadu_si128((__m128i*)max_val);
191230
__m128i *p = (__m128i*)sp;
192-
for (ui32 i = 0; i < count; i += 2, p += 1, dp += 2)
231+
for ( ; count >= 2; count -= 2, p += 1, dp += 2)
193232
{
194233
__m128i v = _mm_loadu_si128(p);
195234
__m128i sign = _mm_cmplt_epi32(v, zero);
@@ -203,6 +242,24 @@ namespace ojph {
203242
val = _mm_or_si128(val, sign);
204243
_mm_storeu_si128((__m128i*)dp, val);
205244
}
245+
if (count)
246+
{
247+
__m128i v = _mm_loadu_si128(p);
248+
__m128i sign = _mm_cmplt_epi32(v, zero);
249+
sign = _mm_shuffle_epi32(sign, 0xF5); // sign = sign[1,1,3,3];
250+
__m128i val = _mm_xor_si128(v, sign); // negate 1's complement
251+
__m128i ones = _mm_and_si128(sign, one);
252+
val = _mm_add_epi64(val, ones); // 2's complement
253+
sign = _mm_and_si128(sign, m0);
254+
val = _mm_slli_epi64(val, (int)shift);
255+
256+
__m128i c = _mm_set_epi32(0, 0, (si32)0xFFFFFFFF, (si32)0xFFFFFFFF);
257+
c = _mm_and_si128(val, c);
258+
tmax = _mm_or_si128(tmax, c);
259+
260+
val = _mm_or_si128(val, sign);
261+
_mm_storeu_si128((__m128i*)dp, val);
262+
}
206263
_mm_storeu_si128((__m128i*)max_val, tmax);
207264
}
208265

@@ -222,10 +279,10 @@ namespace ojph {
222279
__m128i val = _mm_and_si128(v, m1);
223280
val = _mm_srli_epi64(val, (int)shift);
224281
__m128i sign = _mm_cmplt_epi32(v, zero);
225-
sign = _mm_shuffle_epi32(sign, 0xF5); // sign = sign[1,1,3,3];
226-
val = _mm_xor_si128(val, sign); // negate 1's complement
282+
sign = _mm_shuffle_epi32(sign, 0xF5); // sign = sign[1,1,3,3];
283+
val = _mm_xor_si128(val, sign); // negate 1's complement
227284
__m128i ones = _mm_and_si128(sign, one);
228-
val = _mm_add_epi64(val, ones); // 2's complement
285+
val = _mm_add_epi64(val, ones); // 2's complement
229286
_mm_storeu_si128((__m128i*)p, val);
230287
}
231288
}

0 commit comments

Comments
 (0)