Skip to content

Commit 5e3420f

Browse files
committed
Add NEON implementation for substring search in simd_search.h
1 parent bd9d6ed commit 5e3420f

File tree

1 file changed

+55
-0
lines changed

1 file changed

+55
-0
lines changed

include/simd_search.h

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <cstddef>
77
#include <cstdint>
88
#include <string_view>
9+
#include <string>
910

1011
#ifdef __ARM_NEON
1112
#include <arm_neon.h>
@@ -16,6 +17,60 @@
1617
#endif
1718

1819
#ifdef __ARM_NEON
20+
21+
// http://0x80.pl/notesen/2016-11-28-simd-strfind.html#arm-neon-32-bit-code
22+
inline size_t neon_strstr_anysize(std::string_view haystack, std::string_view needle) {
23+
const char* s = haystack.data();
24+
const size_t n = haystack.size();
25+
const char* needle_data = needle.data();
26+
const size_t k = needle.size();
27+
28+
const uint8x16_t first = vdupq_n_u8(needle_data[0]);
29+
const uint8x16_t last = vdupq_n_u8(needle_data[k - 1]);
30+
const uint8x8_t half = vdup_n_u8(0x0f);
31+
32+
const uint8_t* ptr = reinterpret_cast<const uint8_t*>(s);
33+
34+
union {
35+
uint8_t tmp[8];
36+
uint32_t word[2];
37+
};
38+
39+
for (size_t i = 0; i < n; i += 16) {
40+
41+
const uint8x16_t block_first = vld1q_u8(ptr + i);
42+
const uint8x16_t block_last = vld1q_u8(ptr + i + k - 1);
43+
44+
const uint8x16_t eq_first = vceqq_u8(first, block_first);
45+
const uint8x16_t eq_last = vceqq_u8(last, block_last);
46+
const uint8x16_t pred_16 = vandq_u8(eq_first, eq_last);
47+
const uint8x8_t pred_8 = vbsl_u8(half, vget_low_u8(pred_16), vget_high_u8(pred_16));
48+
49+
vst1_u8(tmp, pred_8);
50+
51+
if ((word[0] | word[1]) == 0) {
52+
continue;
53+
}
54+
55+
for (int j=0; j < 8; j++) {
56+
if (tmp[j] & 0x0f) {
57+
if (memcmp(s + i + j + 1, needle_data + 1, k - 2) == 0) {
58+
return i + j;
59+
}
60+
}
61+
}
62+
63+
for (int j=0; j < 8; j++) {
64+
if (tmp[j] & 0xf0) {
65+
if (memcmp(s + i + j + 1 + 8, needle_data + 1, k - 2) == 0) {
66+
return i + j + 8;
67+
}
68+
}
69+
}
70+
}
71+
72+
return std::string::npos;
73+
}
1974
inline bool simd_search4(std::string_view haystack, std::string_view needle) {
2075
if (haystack.size() < 4) {
2176
return false;

0 commit comments

Comments
 (0)