|
6 | 6 |
|
7 | 7 | __m128i gf128_mul(__m128i a, __m128i b) |
8 | 8 | { |
9 | | - (void)a, (void)b; |
10 | | - return (_mm_setzero_si128()); |
| 9 | + __m128i tmp3, tmp4, tmp5, tmp6; |
| 10 | + __m128i tmp7, tmp8, tmp9, tmp10, tmp11, tmp12; |
| 11 | + __m128i XMMASK = _mm_setr_epi32(0xffffffff, 0x0, 0x0, 0x0); |
| 12 | + |
| 13 | + tmp3 = _mm_setzero_si128(); |
| 14 | + tmp4 = _mm_setzero_si128(); |
| 15 | + tmp5 = _mm_setzero_si128(); |
| 16 | + tmp6 = _mm_setzero_si128(); |
| 17 | + tmp7 = _mm_setzero_si128(); |
| 18 | + tmp8 = _mm_setzero_si128(); |
| 19 | + tmp9 = _mm_setzero_si128(); |
| 20 | + tmp10 = _mm_setzero_si128(); |
| 21 | + tmp11 = _mm_setzero_si128(); |
| 22 | + tmp12 = _mm_setzero_si128(); |
| 23 | + |
| 24 | + tmp3 = _mm_clmulepi64_si128(a, b, 0x0); |
| 25 | + tmp4 = _mm_clmulepi64_si128(a, b, 0x11); |
| 26 | + |
| 27 | + tmp4 = _mm_shuffle_epi32(a, 0x4e); |
| 28 | + tmp5 = _mm_shuffle_epi32(b, 0x4e); |
| 29 | + tmp4 = _mm_xor_si128(tmp4, a); |
| 30 | + tmp5 = _mm_xor_si128(tmp5, b); |
| 31 | + |
| 32 | + tmp4 = _mm_clmulepi64_si128(tmp4, tmp5, 0x00); |
| 33 | + tmp4 = _mm_xor_si128(tmp4, tmp3); |
| 34 | + tmp4 = _mm_xor_si128(tmp4, tmp6); |
| 35 | + |
| 36 | + tmp5 = _mm_slli_si128(tmp4, 0x8); |
| 37 | + tmp4 = _mm_srli_si128(tmp4, 0x8); |
| 38 | + tmp3 = _mm_xor_si128(tmp3, tmp5); |
| 39 | + tmp6 = _mm_xor_si128(tmp6, tmp4); |
| 40 | + |
| 41 | + tmp7 = _mm_srli_epi32(tmp6, 31); |
| 42 | + tmp8 = _mm_srli_epi32(tmp6, 30); |
| 43 | + tmp9 = _mm_srli_epi32(tmp6, 25); |
| 44 | + |
| 45 | + tmp7 = _mm_xor_si128(tmp7, tmp8); |
| 46 | + tmp7 = _mm_xor_si128(tmp7, tmp9); |
| 47 | + |
| 48 | + tmp8 = _mm_shuffle_epi32(tmp7, 147); |
| 49 | + |
| 50 | + tmp7 = _mm_and_si128(XMMASK, tmp8); |
| 51 | + tmp8 = _mm_andnot_si128(XMMASK, tmp8); |
| 52 | + tmp3 = _mm_xor_si128(tmp3, tmp8); |
| 53 | + tmp6 = _mm_xor_si128(tmp6, tmp7); |
| 54 | + |
| 55 | + tmp10 = _mm_slli_si128(tmp6, 1); |
| 56 | + tmp3 = _mm_xor_si128(tmp3, tmp10); |
| 57 | + tmp11 = _mm_slli_epi32(tmp6, 2); |
| 58 | + tmp3 = _mm_xor_si128(tmp3, tmp11); |
| 59 | + tmp12 = _mm_slli_epi32(tmp6, 7); |
| 60 | + tmp3 = _mm_xor_si128(tmp3, tmp12); |
| 61 | + |
| 62 | + return (_mm_xor_si128(tmp3, tmp6)); |
11 | 63 | } |
0 commit comments