Skip to content

Commit 52bc44c

Browse files
committed
📝 Update main
1 parent 654e940 commit 52bc44c

File tree

1 file changed

+54
-2
lines changed

1 file changed

+54
-2
lines changed

src/gf/gf128_mul.c

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,58 @@
66

77
__m128i gf128_mul(__m128i a, __m128i b)
88
{
9-
(void)a, (void)b;
10-
return (_mm_setzero_si128());
9+
__m128i tmp3, tmp4, tmp5, tmp6;
10+
__m128i tmp7, tmp8, tmp9, tmp10, tmp11, tmp12;
11+
__m128i XMMASK = _mm_setr_epi32(0xffffffff, 0x0, 0x0, 0x0);
12+
13+
tmp3 = _mm_setzero_si128();
14+
tmp4 = _mm_setzero_si128();
15+
tmp5 = _mm_setzero_si128();
16+
tmp6 = _mm_setzero_si128();
17+
tmp7 = _mm_setzero_si128();
18+
tmp8 = _mm_setzero_si128();
19+
tmp9 = _mm_setzero_si128();
20+
tmp10 = _mm_setzero_si128();
21+
tmp11 = _mm_setzero_si128();
22+
tmp12 = _mm_setzero_si128();
23+
24+
tmp3 = _mm_clmulepi64_si128(a, b, 0x0);
25+
tmp4 = _mm_clmulepi64_si128(a, b, 0x11);
26+
27+
tmp4 = _mm_shuffle_epi32(a, 0x4e);
28+
tmp5 = _mm_shuffle_epi32(b, 0x4e);
29+
tmp4 = _mm_xor_si128(tmp4, a);
30+
tmp5 = _mm_xor_si128(tmp5, b);
31+
32+
tmp4 = _mm_clmulepi64_si128(tmp4, tmp5, 0x00);
33+
tmp4 = _mm_xor_si128(tmp4, tmp3);
34+
tmp4 = _mm_xor_si128(tmp4, tmp6);
35+
36+
tmp5 = _mm_slli_si128(tmp4, 0x8);
37+
tmp4 = _mm_srli_si128(tmp4, 0x8);
38+
tmp3 = _mm_xor_si128(tmp3, tmp5);
39+
tmp6 = _mm_xor_si128(tmp6, tmp4);
40+
41+
tmp7 = _mm_srli_epi32(tmp6, 31);
42+
tmp8 = _mm_srli_epi32(tmp6, 30);
43+
tmp9 = _mm_srli_epi32(tmp6, 25);
44+
45+
tmp7 = _mm_xor_si128(tmp7, tmp8);
46+
tmp7 = _mm_xor_si128(tmp7, tmp9);
47+
48+
tmp8 = _mm_shuffle_epi32(tmp7, 147);
49+
50+
tmp7 = _mm_and_si128(XMMASK, tmp8);
51+
tmp8 = _mm_andnot_si128(XMMASK, tmp8);
52+
tmp3 = _mm_xor_si128(tmp3, tmp8);
53+
tmp6 = _mm_xor_si128(tmp6, tmp7);
54+
55+
tmp10 = _mm_slli_si128(tmp6, 1);
56+
tmp3 = _mm_xor_si128(tmp3, tmp10);
57+
tmp11 = _mm_slli_epi32(tmp6, 2);
58+
tmp3 = _mm_xor_si128(tmp3, tmp11);
59+
tmp12 = _mm_slli_epi32(tmp6, 7);
60+
tmp3 = _mm_xor_si128(tmp3, tmp12);
61+
62+
return (_mm_xor_si128(tmp3, tmp6));
1163
}

0 commit comments

Comments
 (0)