Skip to content

Commit 1328d76

Browse files
committed
Apply upstream changes
1 parent 7351083 commit 1328d76

File tree

4 files changed

+130
-92
lines changed

4 files changed

+130
-92
lines changed

Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,10 +314,12 @@ check-clang-format-version:
314314
`clang-format --version` ")" ; exit 1 )
315315

316316
# 10. Feb 23 (Jonas Keller): added format rule
317+
# 10. Jun 24 (rm): this creates new files instead of the linked ones:
318+
# @clang-format -i *.C *.H
317319
.PHONY: format
318320
format: check-clang-format-version
319321
@echo "formatting all .C and .H files"
320-
@clang-format -i *.C *.H
322+
@formatAllHC
321323

322324
# 02. Mar 23 (Jonas Keller): added documenation rule
323325
# 20. Nov 23 (Jonas Keller): added check for doxygen version

SIMDVecBaseImplIntel16.H

Lines changed: 90 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,7 +1104,7 @@ static SIMD_INLINE Vec<Long, 16> min(const Vec<Long, 16> &a,
11041104
// from Hacker's Delight, 2-12 Comparison Predicates: (swapped lt)
11051105
const __m128i diff = _mm_sub_epi64(b, a);
11061106
#if 1 // TODO: check which is faster
1107-
const __m128i res = _mm_xor_si128(
1107+
const __m128i res = _mm_xor_si128(
11081108
diff, _mm_and_si128(_mm_xor_si128(b, a), _mm_xor_si128(diff, b)));
11091109
#else
11101110
const __m128i res = _mm_or_si128(_mm_andnot_si128(a, b),
@@ -1209,7 +1209,7 @@ static SIMD_INLINE Vec<Long, 16> max(const Vec<Long, 16> &a,
12091209
// from Hacker's Delight, 2-12 Comparison Predicates: (swapped lt)
12101210
const __m128i diff = _mm_sub_epi64(b, a);
12111211
#if 1 // TODO: check which is faster
1212-
const __m128i res = _mm_xor_si128(
1212+
const __m128i res = _mm_xor_si128(
12131213
diff, _mm_and_si128(_mm_xor_si128(b, a), _mm_xor_si128(diff, b)));
12141214
#else
12151215
const __m128i res = _mm_or_si128(_mm_andnot_si128(a, b),
@@ -2188,10 +2188,10 @@ static SIMD_INLINE void extend(const Vec<SignedByte, 16> &vIn,
21882188
const __m128i vInPos = _mm_max_epi8(vIn, _mm_setzero_si128());
21892189
#else
21902190
// from Agner Fog's VCL vectori128.h
2191-
const __m128i signbit = _mm_set1_epi32(0x80808080);
2192-
const __m128i a1 = _mm_xor_si128(vIn, signbit); // add 0x80
2193-
const __m128i m1 = _mm_max_epu8(a1, signbit); // unsigned max
2194-
const __m128i vInPos = _mm_xor_si128(m1, signbit); // sub 0x80
2191+
const __m128i signbit = _mm_set1_epi32(0x80808080);
2192+
const __m128i a1 = _mm_xor_si128(vIn, signbit); // add 0x80
2193+
const __m128i m1 = _mm_max_epu8(a1, signbit); // unsigned max
2194+
const __m128i vInPos = _mm_xor_si128(m1, signbit); // sub 0x80
21952195
#endif
21962196
vOut[0] = _mm_unpacklo_epi8(vInPos, _mm_setzero_si128());
21972197
vOut[1] = _mm_unpackhi_epi8(vInPos, _mm_setzero_si128());
@@ -2212,16 +2212,16 @@ static SIMD_INLINE void extend(const Vec<SignedByte, 16> &vIn,
22122212
vOut[2] = _mm_cvtepi8_epi32(_mm_srli_si128(vIn, 8));
22132213
vOut[3] = _mm_cvtepi8_epi32(_mm_srli_si128(vIn, 12));
22142214
#else
2215-
const __m128i lo8 = _mm_unpacklo_epi8(_mm_undefined_si128(), vIn);
2216-
const __m128i hi8 = _mm_unpackhi_epi8(_mm_undefined_si128(), vIn);
2217-
const __m128i lolo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), lo8);
2218-
const __m128i lohi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), lo8);
2219-
const __m128i hilo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), hi8);
2220-
const __m128i hihi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), hi8);
2221-
vOut[0] = _mm_srai_epi32(lolo16, 24);
2222-
vOut[1] = _mm_srai_epi32(lohi16, 24);
2223-
vOut[2] = _mm_srai_epi32(hilo16, 24);
2224-
vOut[3] = _mm_srai_epi32(hihi16, 24);
2215+
const __m128i lo8 = _mm_unpacklo_epi8(_mm_undefined_si128(), vIn);
2216+
const __m128i hi8 = _mm_unpackhi_epi8(_mm_undefined_si128(), vIn);
2217+
const __m128i lolo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), lo8);
2218+
const __m128i lohi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), lo8);
2219+
const __m128i hilo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), hi8);
2220+
const __m128i hihi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), hi8);
2221+
vOut[0] = _mm_srai_epi32(lolo16, 24);
2222+
vOut[1] = _mm_srai_epi32(lohi16, 24);
2223+
vOut[2] = _mm_srai_epi32(hilo16, 24);
2224+
vOut[3] = _mm_srai_epi32(hihi16, 24);
22252225
#endif
22262226
}
22272227

@@ -2234,16 +2234,16 @@ static SIMD_INLINE void extend(const Vec<SignedByte, 16> &vIn,
22342234
vOut[2] = _mm_cvtepi32_ps(_mm_cvtepi8_epi32(_mm_srli_si128(vIn, 8)));
22352235
vOut[3] = _mm_cvtepi32_ps(_mm_cvtepi8_epi32(_mm_srli_si128(vIn, 12)));
22362236
#else
2237-
const __m128i lo8 = _mm_unpacklo_epi8(_mm_undefined_si128(), vIn);
2238-
const __m128i hi8 = _mm_unpackhi_epi8(_mm_undefined_si128(), vIn);
2239-
const __m128i lolo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), lo8);
2240-
const __m128i lohi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), lo8);
2241-
const __m128i hilo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), hi8);
2242-
const __m128i hihi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), hi8);
2243-
vOut[0] = _mm_cvtepi32_ps(_mm_srai_epi32(lolo16, 24));
2244-
vOut[1] = _mm_cvtepi32_ps(_mm_srai_epi32(lohi16, 24));
2245-
vOut[2] = _mm_cvtepi32_ps(_mm_srai_epi32(hilo16, 24));
2246-
vOut[3] = _mm_cvtepi32_ps(_mm_srai_epi32(hihi16, 24));
2237+
const __m128i lo8 = _mm_unpacklo_epi8(_mm_undefined_si128(), vIn);
2238+
const __m128i hi8 = _mm_unpackhi_epi8(_mm_undefined_si128(), vIn);
2239+
const __m128i lolo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), lo8);
2240+
const __m128i lohi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), lo8);
2241+
const __m128i hilo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), hi8);
2242+
const __m128i hihi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), hi8);
2243+
vOut[0] = _mm_cvtepi32_ps(_mm_srai_epi32(lolo16, 24));
2244+
vOut[1] = _mm_cvtepi32_ps(_mm_srai_epi32(lohi16, 24));
2245+
vOut[2] = _mm_cvtepi32_ps(_mm_srai_epi32(hilo16, 24));
2246+
vOut[3] = _mm_cvtepi32_ps(_mm_srai_epi32(hihi16, 24));
22472247
#endif
22482248
}
22492249

@@ -2281,10 +2281,10 @@ static SIMD_INLINE void extend(const Vec<Short, 16> &vIn,
22812281
_mm_srai_epi32(_mm_unpacklo_epi16(_mm_undefined_si128(), vIn), 16);
22822282
const __m128i hi16 =
22832283
_mm_srai_epi32(_mm_unpackhi_epi16(_mm_undefined_si128(), vIn), 16);
2284-
vOut[0] = _mm_cvtepi32_pd(lo16);
2285-
vOut[1] = _mm_cvtepi32_pd(_mm_srli_si128(lo16, 8));
2286-
vOut[2] = _mm_cvtepi32_pd(hi16);
2287-
vOut[3] = _mm_cvtepi32_pd(_mm_srli_si128(hi16, 8));
2284+
vOut[0] = _mm_cvtepi32_pd(lo16);
2285+
vOut[1] = _mm_cvtepi32_pd(_mm_srli_si128(lo16, 8));
2286+
vOut[2] = _mm_cvtepi32_pd(hi16);
2287+
vOut[3] = _mm_cvtepi32_pd(_mm_srli_si128(hi16, 8));
22882288
#endif
22892289
}
22902290

@@ -2332,12 +2332,12 @@ static SIMD_INLINE void extend(const Vec<Word, 16> &vIn, Vec<Long, 16> vOut[4])
23322332
vOut[2] = _mm_cvtepu16_epi64(_mm_srli_si128(vIn, 8));
23332333
vOut[3] = _mm_cvtepu16_epi64(_mm_srli_si128(vIn, 12));
23342334
#else
2335-
const __m128i lo16 = _mm_unpacklo_epi16(vIn, _mm_setzero_si128());
2336-
const __m128i hi16 = _mm_unpackhi_epi16(vIn, _mm_setzero_si128());
2337-
vOut[0] = _mm_unpacklo_epi32(lo16, _mm_setzero_si128());
2338-
vOut[1] = _mm_unpackhi_epi32(lo16, _mm_setzero_si128());
2339-
vOut[2] = _mm_unpacklo_epi32(hi16, _mm_setzero_si128());
2340-
vOut[3] = _mm_unpackhi_epi32(hi16, _mm_setzero_si128());
2335+
const __m128i lo16 = _mm_unpacklo_epi16(vIn, _mm_setzero_si128());
2336+
const __m128i hi16 = _mm_unpackhi_epi16(vIn, _mm_setzero_si128());
2337+
vOut[0] = _mm_unpacklo_epi32(lo16, _mm_setzero_si128());
2338+
vOut[1] = _mm_unpackhi_epi32(lo16, _mm_setzero_si128());
2339+
vOut[2] = _mm_unpacklo_epi32(hi16, _mm_setzero_si128());
2340+
vOut[3] = _mm_unpackhi_epi32(hi16, _mm_setzero_si128());
23412341
#endif
23422342
}
23432343

@@ -2350,12 +2350,12 @@ static SIMD_INLINE void extend(const Vec<Word, 16> &vIn,
23502350
vOut[2] = _mm_cvtepi32_pd(_mm_cvtepu16_epi32(_mm_srli_si128(vIn, 8)));
23512351
vOut[3] = _mm_cvtepi32_pd(_mm_cvtepu16_epi32(_mm_srli_si128(vIn, 12)));
23522352
#else
2353-
const __m128i lo16 = _mm_unpacklo_epi16(vIn, _mm_setzero_si128());
2354-
const __m128i hi16 = _mm_unpackhi_epi16(vIn, _mm_setzero_si128());
2355-
vOut[0] = _mm_cvtepi32_pd(lo16);
2356-
vOut[1] = _mm_cvtepi32_pd(_mm_srli_si128(lo16, 8));
2357-
vOut[2] = _mm_cvtepi32_pd(hi16);
2358-
vOut[3] = _mm_cvtepi32_pd(_mm_srli_si128(hi16, 8));
2353+
const __m128i lo16 = _mm_unpacklo_epi16(vIn, _mm_setzero_si128());
2354+
const __m128i hi16 = _mm_unpackhi_epi16(vIn, _mm_setzero_si128());
2355+
vOut[0] = _mm_cvtepi32_pd(lo16);
2356+
vOut[1] = _mm_cvtepi32_pd(_mm_srli_si128(lo16, 8));
2357+
vOut[2] = _mm_cvtepi32_pd(hi16);
2358+
vOut[3] = _mm_cvtepi32_pd(_mm_srli_si128(hi16, 8));
23592359
#endif
23602360
}
23612361

@@ -2416,24 +2416,24 @@ static SIMD_INLINE void extend(const Vec<SignedByte, 16> &vIn,
24162416
vOut[6] = _mm_cvtepi32_pd(_mm_cvtepi8_epi32(_mm_srli_si128(vIn, 12)));
24172417
vOut[7] = _mm_cvtepi32_pd(_mm_cvtepi8_epi32(_mm_srli_si128(vIn, 14)));
24182418
#else
2419-
const __m128i lo8 = _mm_unpacklo_epi8(_mm_undefined_si128(), vIn);
2420-
const __m128i hi8 = _mm_unpackhi_epi8(_mm_undefined_si128(), vIn);
2421-
const __m128i lolo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), lo8);
2422-
const __m128i lohi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), lo8);
2423-
const __m128i hilo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), hi8);
2424-
const __m128i hihi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), hi8);
2425-
const __m128i lolo16ext = _mm_srai_epi32(lolo16, 24);
2426-
const __m128i lohi16ext = _mm_srai_epi32(lohi16, 24);
2427-
const __m128i hilo16ext = _mm_srai_epi32(hilo16, 24);
2428-
const __m128i hihi16ext = _mm_srai_epi32(hihi16, 24);
2429-
vOut[0] = _mm_cvtepi32_pd(lolo16ext);
2430-
vOut[1] = _mm_cvtepi32_pd(_mm_srli_si128(lolo16ext, 8));
2431-
vOut[2] = _mm_cvtepi32_pd(lohi16ext);
2432-
vOut[3] = _mm_cvtepi32_pd(_mm_srli_si128(lohi16ext, 8));
2433-
vOut[4] = _mm_cvtepi32_pd(hilo16ext);
2434-
vOut[5] = _mm_cvtepi32_pd(_mm_srli_si128(hilo16ext, 8));
2435-
vOut[6] = _mm_cvtepi32_pd(hihi16ext);
2436-
vOut[7] = _mm_cvtepi32_pd(_mm_srli_si128(hihi16ext, 8));
2419+
const __m128i lo8 = _mm_unpacklo_epi8(_mm_undefined_si128(), vIn);
2420+
const __m128i hi8 = _mm_unpackhi_epi8(_mm_undefined_si128(), vIn);
2421+
const __m128i lolo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), lo8);
2422+
const __m128i lohi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), lo8);
2423+
const __m128i hilo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), hi8);
2424+
const __m128i hihi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), hi8);
2425+
const __m128i lolo16ext = _mm_srai_epi32(lolo16, 24);
2426+
const __m128i lohi16ext = _mm_srai_epi32(lohi16, 24);
2427+
const __m128i hilo16ext = _mm_srai_epi32(hilo16, 24);
2428+
const __m128i hihi16ext = _mm_srai_epi32(hihi16, 24);
2429+
vOut[0] = _mm_cvtepi32_pd(lolo16ext);
2430+
vOut[1] = _mm_cvtepi32_pd(_mm_srli_si128(lolo16ext, 8));
2431+
vOut[2] = _mm_cvtepi32_pd(lohi16ext);
2432+
vOut[3] = _mm_cvtepi32_pd(_mm_srli_si128(lohi16ext, 8));
2433+
vOut[4] = _mm_cvtepi32_pd(hilo16ext);
2434+
vOut[5] = _mm_cvtepi32_pd(_mm_srli_si128(hilo16ext, 8));
2435+
vOut[6] = _mm_cvtepi32_pd(hihi16ext);
2436+
vOut[7] = _mm_cvtepi32_pd(_mm_srli_si128(hihi16ext, 8));
24372437
#endif
24382438
}
24392439

@@ -2451,20 +2451,20 @@ static SIMD_INLINE void extend(const Vec<Byte, 16> &vIn, Vec<Long, 16> vOut[8])
24512451
vOut[6] = _mm_cvtepu8_epi64(_mm_srli_si128(vIn, 12));
24522452
vOut[7] = _mm_cvtepu8_epi64(_mm_srli_si128(vIn, 14));
24532453
#else
2454-
const __m128i lo8 = _mm_unpacklo_epi8(vIn, _mm_setzero_si128());
2455-
const __m128i hi8 = _mm_unpackhi_epi8(vIn, _mm_setzero_si128());
2456-
const __m128i lolo16 = _mm_unpacklo_epi16(lo8, _mm_setzero_si128());
2457-
const __m128i lohi16 = _mm_unpackhi_epi16(lo8, _mm_setzero_si128());
2458-
const __m128i hilo16 = _mm_unpacklo_epi16(hi8, _mm_setzero_si128());
2459-
const __m128i hihi16 = _mm_unpackhi_epi16(hi8, _mm_setzero_si128());
2460-
vOut[0] = _mm_unpacklo_epi32(lolo16, _mm_setzero_si128());
2461-
vOut[1] = _mm_unpackhi_epi32(lolo16, _mm_setzero_si128());
2462-
vOut[2] = _mm_unpacklo_epi32(lohi16, _mm_setzero_si128());
2463-
vOut[3] = _mm_unpackhi_epi32(lohi16, _mm_setzero_si128());
2464-
vOut[4] = _mm_unpacklo_epi32(hilo16, _mm_setzero_si128());
2465-
vOut[5] = _mm_unpackhi_epi32(hilo16, _mm_setzero_si128());
2466-
vOut[6] = _mm_unpacklo_epi32(hihi16, _mm_setzero_si128());
2467-
vOut[7] = _mm_unpackhi_epi32(hihi16, _mm_setzero_si128());
2454+
const __m128i lo8 = _mm_unpacklo_epi8(vIn, _mm_setzero_si128());
2455+
const __m128i hi8 = _mm_unpackhi_epi8(vIn, _mm_setzero_si128());
2456+
const __m128i lolo16 = _mm_unpacklo_epi16(lo8, _mm_setzero_si128());
2457+
const __m128i lohi16 = _mm_unpackhi_epi16(lo8, _mm_setzero_si128());
2458+
const __m128i hilo16 = _mm_unpacklo_epi16(hi8, _mm_setzero_si128());
2459+
const __m128i hihi16 = _mm_unpackhi_epi16(hi8, _mm_setzero_si128());
2460+
vOut[0] = _mm_unpacklo_epi32(lolo16, _mm_setzero_si128());
2461+
vOut[1] = _mm_unpackhi_epi32(lolo16, _mm_setzero_si128());
2462+
vOut[2] = _mm_unpacklo_epi32(lohi16, _mm_setzero_si128());
2463+
vOut[3] = _mm_unpackhi_epi32(lohi16, _mm_setzero_si128());
2464+
vOut[4] = _mm_unpacklo_epi32(hilo16, _mm_setzero_si128());
2465+
vOut[5] = _mm_unpackhi_epi32(hilo16, _mm_setzero_si128());
2466+
vOut[6] = _mm_unpacklo_epi32(hihi16, _mm_setzero_si128());
2467+
vOut[7] = _mm_unpackhi_epi32(hihi16, _mm_setzero_si128());
24682468
#endif
24692469
}
24702470

@@ -2481,20 +2481,20 @@ static SIMD_INLINE void extend(const Vec<Byte, 16> &vIn,
24812481
vOut[6] = _mm_cvtepi32_pd(_mm_cvtepu8_epi32(_mm_srli_si128(vIn, 12)));
24822482
vOut[7] = _mm_cvtepi32_pd(_mm_cvtepu8_epi32(_mm_srli_si128(vIn, 14)));
24832483
#else
2484-
const __m128i lo8 = _mm_unpacklo_epi8(vIn, _mm_setzero_si128());
2485-
const __m128i hi8 = _mm_unpackhi_epi8(vIn, _mm_setzero_si128());
2486-
const __m128i lolo16 = _mm_unpacklo_epi16(lo8, _mm_setzero_si128());
2487-
const __m128i lohi16 = _mm_unpackhi_epi16(lo8, _mm_setzero_si128());
2488-
const __m128i hilo16 = _mm_unpacklo_epi16(hi8, _mm_setzero_si128());
2489-
const __m128i hihi16 = _mm_unpackhi_epi16(hi8, _mm_setzero_si128());
2490-
vOut[0] = _mm_cvtepi32_pd(lolo16);
2491-
vOut[1] = _mm_cvtepi32_pd(_mm_srli_si128(lolo16, 8));
2492-
vOut[2] = _mm_cvtepi32_pd(lohi16);
2493-
vOut[3] = _mm_cvtepi32_pd(_mm_srli_si128(lohi16, 8));
2494-
vOut[4] = _mm_cvtepi32_pd(hilo16);
2495-
vOut[5] = _mm_cvtepi32_pd(_mm_srli_si128(hilo16, 8));
2496-
vOut[6] = _mm_cvtepi32_pd(hihi16);
2497-
vOut[7] = _mm_cvtepi32_pd(_mm_srli_si128(hihi16, 8));
2484+
const __m128i lo8 = _mm_unpacklo_epi8(vIn, _mm_setzero_si128());
2485+
const __m128i hi8 = _mm_unpackhi_epi8(vIn, _mm_setzero_si128());
2486+
const __m128i lolo16 = _mm_unpacklo_epi16(lo8, _mm_setzero_si128());
2487+
const __m128i lohi16 = _mm_unpackhi_epi16(lo8, _mm_setzero_si128());
2488+
const __m128i hilo16 = _mm_unpacklo_epi16(hi8, _mm_setzero_si128());
2489+
const __m128i hihi16 = _mm_unpackhi_epi16(hi8, _mm_setzero_si128());
2490+
vOut[0] = _mm_cvtepi32_pd(lolo16);
2491+
vOut[1] = _mm_cvtepi32_pd(_mm_srli_si128(lolo16, 8));
2492+
vOut[2] = _mm_cvtepi32_pd(lohi16);
2493+
vOut[3] = _mm_cvtepi32_pd(_mm_srli_si128(lohi16, 8));
2494+
vOut[4] = _mm_cvtepi32_pd(hilo16);
2495+
vOut[5] = _mm_cvtepi32_pd(_mm_srli_si128(hilo16, 8));
2496+
vOut[6] = _mm_cvtepi32_pd(hihi16);
2497+
vOut[7] = _mm_cvtepi32_pd(_mm_srli_si128(hihi16, 8));
24982498
#endif
24992499
}
25002500

@@ -3572,7 +3572,7 @@ static SIMD_INLINE Vec<Long, 16> cmplt(const Vec<Long, 16> &a,
35723572
// from Hacker's Delight, 2-12 Comparison Predicates:
35733573
const __m128i diff = _mm_sub_epi64(a, b);
35743574
#if 1 // TODO: check which is faster
3575-
const __m128i res = _mm_xor_si128(
3575+
const __m128i res = _mm_xor_si128(
35763576
diff, _mm_and_si128(_mm_xor_si128(a, b), _mm_xor_si128(diff, a)));
35773577
#else
35783578
const __m128i res = _mm_or_si128(_mm_andnot_si128(b, a),
@@ -3777,7 +3777,7 @@ static SIMD_INLINE Vec<Long, 16> cmpgt(const Vec<Long, 16> &a,
37773777
// from Hacker's Delight, 2-12 Comparison Predicates: (swapped lt)
37783778
const __m128i diff = _mm_sub_epi64(b, a);
37793779
#if 1 // TODO: check which is faster
3780-
const __m128i res = _mm_xor_si128(
3780+
const __m128i res = _mm_xor_si128(
37813781
diff, _mm_and_si128(_mm_xor_si128(b, a), _mm_xor_si128(diff, b)));
37823782
#else
37833783
const __m128i res = _mm_or_si128(_mm_andnot_si128(a, b),

formatAllHC

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/env tcsh
2+
#===========================================================================
3+
#
4+
# formatAllHC --
5+
# run clang-format on all .H and .C files in the directory
6+
#
7+
# This file is part of the following software:
8+
#
9+
# - the low-level C++ template SIMD library
10+
# - the SIMD implementation of the MinWarping and the 2D-Warping methods
11+
# for local visual homing.
12+
#
13+
# The software is provided based on the accompanying license agreement
14+
# in the file LICENSE or LICENSE.doc. The software is provided "as is"
15+
# without any warranty by the licensor and without any liability of the
16+
# licensor, and the software may not be distributed by the licensee; see
17+
# the license agreement for details.
18+
#
19+
# (C) Ralf Möller
20+
# Computer Engineering
21+
# Faculty of Technology
22+
# Bielefeld University
23+
# www.ti.uni-bielefeld.de
24+
#
25+
#===========================================================================
26+
27+
foreach src (`ls -1 *.H *.C`)
28+
# echo "formatting $src"
29+
set tmpSrc = tmp_${src}
30+
clang-format $src > $tmpSrc
31+
if (`cmp $src $tmpSrc` != "") then
32+
echo "$src was re-formatted"
33+
endif
34+
/bin/cp -f $tmpSrc $src
35+
/bin/rm -f $tmpSrc
36+
end

simdmasktest.C

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -823,7 +823,7 @@ void benchmark()
823823
#ifdef BENCH_MASK_RAND2
824824
result = maskz_load(krand2, buffer);
825825
#endif
826-
sum = add(sum, result);
826+
sum = add(sum, result);
827827
}
828828
gettimeofday(&end, nullptr);
829829
print("%i ", sum); // TODO change format according to type T

0 commit comments

Comments
 (0)