|
| 1 | +#include <emmintrin.h> |
| 2 | +#include <immintrin.h> |
| 3 | +#include <stdint.h> |
| 4 | +#include <tmmintrin.h> |
| 5 | +#include <xmmintrin.h> |
| 6 | + |
| 7 | +void gfmul(const uint8_t a[0x10], const uint8_t b[0x10], const uint8_t r[0x10]) |
| 8 | +{ |
| 9 | + const __m128i MASK = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
| 10 | + |
| 11 | + __m128i xmm0 = _mm_loadu_si128((__m128i*)a); |
| 12 | + __m128i xmm1 = _mm_loadu_si128((__m128i*)b); |
| 13 | + __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9; |
| 14 | + |
| 15 | + xmm0 = _mm_shuffle_epi8(xmm0, MASK); |
| 16 | + xmm1 = _mm_shuffle_epi8(xmm1, MASK); |
| 17 | + |
| 18 | + tmp3 = _mm_clmulepi64_si128(xmm0, xmm1, 0x00); |
| 19 | + tmp4 = _mm_clmulepi64_si128(xmm0, xmm1, 0x10); |
| 20 | + tmp5 = _mm_clmulepi64_si128(xmm0, xmm1, 0x01); |
| 21 | + tmp6 = _mm_clmulepi64_si128(xmm0, xmm1, 0x11); |
| 22 | + |
| 23 | + tmp4 = _mm_xor_si128(tmp4, tmp5); |
| 24 | + tmp5 = _mm_slli_si128(tmp4, 8); |
| 25 | + tmp4 = _mm_srli_si128(tmp4, 8); |
| 26 | + tmp3 = _mm_xor_si128(tmp3, tmp5); |
| 27 | + tmp6 = _mm_xor_si128(tmp6, tmp4); |
| 28 | + |
| 29 | + tmp7 = _mm_srli_epi32(tmp3, 31); |
| 30 | + tmp8 = _mm_srli_epi32(tmp6, 31); |
| 31 | + tmp3 = _mm_slli_epi32(tmp3, 1); |
| 32 | + tmp6 = _mm_slli_epi32(tmp6, 1); |
| 33 | + |
| 34 | + tmp9 = _mm_srli_si128(tmp7, 12); |
| 35 | + tmp8 = _mm_slli_si128(tmp8, 4); |
| 36 | + tmp7 = _mm_slli_si128(tmp7, 4); |
| 37 | + tmp3 = _mm_or_si128(tmp3, tmp7); |
| 38 | + tmp6 = _mm_or_si128(tmp6, tmp8); |
| 39 | + tmp6 = _mm_or_si128(tmp6, tmp9); |
| 40 | + |
| 41 | + tmp7 = _mm_slli_epi32(tmp3, 31); |
| 42 | + tmp8 = _mm_slli_epi32(tmp3, 30); |
| 43 | + tmp9 = _mm_slli_epi32(tmp3, 25); |
| 44 | + |
| 45 | + tmp7 = _mm_xor_si128(tmp7, tmp8); |
| 46 | + tmp7 = _mm_xor_si128(tmp7, tmp9); |
| 47 | + tmp8 = _mm_srli_si128(tmp7, 4); |
| 48 | + tmp7 = _mm_slli_si128(tmp7, 12); |
| 49 | + tmp3 = _mm_xor_si128(tmp3, tmp7); |
| 50 | + |
| 51 | + tmp2 = _mm_srli_epi32(tmp3, 1); |
| 52 | + tmp4 = _mm_srli_epi32(tmp3, 2); |
| 53 | + tmp5 = _mm_srli_epi32(tmp3, 7); |
| 54 | + tmp2 = _mm_xor_si128(tmp2, tmp4); |
| 55 | + tmp2 = _mm_xor_si128(tmp2, tmp5); |
| 56 | + tmp2 = _mm_xor_si128(tmp2, tmp8); |
| 57 | + tmp3 = _mm_xor_si128(tmp3, tmp2); |
| 58 | + tmp6 = _mm_xor_si128(tmp6, tmp3); |
| 59 | + |
| 60 | + |
| 61 | + tmp6 = _mm_shuffle_epi8(tmp6, MASK); |
| 62 | + |
| 63 | + _mm_storeu_si128((__m128i*)r, tmp6); |
| 64 | +} |
| 65 | + |
| 66 | + |
| 67 | +#include <stdio.h> |
| 68 | +#include <string.h> |
| 69 | +int main(int argc, char* argv[]) |
| 70 | +{ |
| 71 | + /* A's high nibble is 0x01, B's high nibble is 0x02 */ |
| 72 | + uint8_t a[16] = {0x1f,0x1e,0x1d,0x1c,0x1b,0x1a,0x19,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10}; |
| 73 | + uint8_t b[16] = {0x2f,0x2e,0x2d,0x2c,0x2b,0x2a,0x29,0x28,0x27,0x26,0x25,0x24,0x23,0x22,0x21,0x20}; |
| 74 | + //uint8_t a[0x10] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; |
| 75 | + //uint8_t b[0x10] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; |
| 76 | + uint8_t r[16]; |
| 77 | + |
| 78 | + gfmul(a, b, r); |
| 79 | + |
| 80 | + /* 020BBEB352AEAE16... */ |
| 81 | + printf("GHASH of message: "); |
| 82 | + printf("%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X\n", |
| 83 | + r[0] & 0xFF, r[1] & 0xFF, r[2] & 0xFF, r[3] & 0xFF, |
| 84 | + r[4] & 0xFF, r[5] & 0xFF, r[6] & 0xFF, r[7] & 0xFF, r[8] & 0xFF, r[9] & 0xFF, r[10] & 0xFF, |
| 85 | + r[11] & 0xFF, r[12] & 0xFF, r[13] & 0xFF, r[14] & 0xFF, r[15] & 0xFF); |
| 86 | + |
| 87 | + int success = (r[0] == 0x02 && r[1] == 0x0B && r[2] == 0xBE && r[3] == 0xB3 && |
| 88 | + r[4] == 0x52 && r[5] == 0xAE && r[6] == 0xAE && r[7] == 0x16); |
| 89 | + |
| 90 | + if (success) |
| 91 | + printf("Success!\n"); |
| 92 | + else |
| 93 | + printf("Failure!\n"); |
| 94 | + |
| 95 | + return (success != 0 ? 0 : 1); |
| 96 | +} |
| 97 | + |
0 commit comments