|
6 | 6 | #include <cstddef> |
7 | 7 | #include <cstdint> |
8 | 8 | #include <string_view> |
| 9 | +#include <string> |
9 | 10 |
|
10 | 11 | #ifdef __ARM_NEON |
11 | 12 | #include <arm_neon.h> |
|
16 | 17 | #endif |
17 | 18 |
|
18 | 19 | #ifdef __ARM_NEON |
| 20 | + |
| 21 | +// http://0x80.pl/notesen/2016-11-28-simd-strfind.html#arm-neon-32-bit-code |
| 22 | +inline size_t neon_strstr_anysize(std::string_view haystack, std::string_view needle) { |
| 23 | + const char* s = haystack.data(); |
| 24 | + const size_t n = haystack.size(); |
| 25 | + const char* needle_data = needle.data(); |
| 26 | + const size_t k = needle.size(); |
| 27 | + |
| 28 | + const uint8x16_t first = vdupq_n_u8(needle_data[0]); |
| 29 | + const uint8x16_t last = vdupq_n_u8(needle_data[k - 1]); |
| 30 | + const uint8x8_t half = vdup_n_u8(0x0f); |
| 31 | + |
| 32 | + const uint8_t* ptr = reinterpret_cast<const uint8_t*>(s); |
| 33 | + |
| 34 | + union { |
| 35 | + uint8_t tmp[8]; |
| 36 | + uint32_t word[2]; |
| 37 | + }; |
| 38 | + |
| 39 | + for (size_t i = 0; i < n; i += 16) { |
| 40 | + |
| 41 | + const uint8x16_t block_first = vld1q_u8(ptr + i); |
| 42 | + const uint8x16_t block_last = vld1q_u8(ptr + i + k - 1); |
| 43 | + |
| 44 | + const uint8x16_t eq_first = vceqq_u8(first, block_first); |
| 45 | + const uint8x16_t eq_last = vceqq_u8(last, block_last); |
| 46 | + const uint8x16_t pred_16 = vandq_u8(eq_first, eq_last); |
| 47 | + const uint8x8_t pred_8 = vbsl_u8(half, vget_low_u8(pred_16), vget_high_u8(pred_16)); |
| 48 | + |
| 49 | + vst1_u8(tmp, pred_8); |
| 50 | + |
| 51 | + if ((word[0] | word[1]) == 0) { |
| 52 | + continue; |
| 53 | + } |
| 54 | + |
| 55 | + for (int j=0; j < 8; j++) { |
| 56 | + if (tmp[j] & 0x0f) { |
| 57 | + if (memcmp(s + i + j + 1, needle_data + 1, k - 2) == 0) { |
| 58 | + return i + j; |
| 59 | + } |
| 60 | + } |
| 61 | + } |
| 62 | + |
| 63 | + for (int j=0; j < 8; j++) { |
| 64 | + if (tmp[j] & 0xf0) { |
| 65 | + if (memcmp(s + i + j + 1 + 8, needle_data + 1, k - 2) == 0) { |
| 66 | + return i + j + 8; |
| 67 | + } |
| 68 | + } |
| 69 | + } |
| 70 | + } |
| 71 | + |
| 72 | + return std::string::npos; |
| 73 | +} |
19 | 74 | inline bool simd_search4(std::string_view haystack, std::string_view needle) { |
20 | 75 | if (haystack.size() < 4) { |
21 | 76 | return false; |
|
0 commit comments