|
| 1 | +/* |
| 2 | + * Copyright 2007 Project RELIC |
| 3 | + * |
| 4 | + * This file is part of RELIC. RELIC is legal property of its developers, |
| 5 | + * whose names are not listed here. Please refer to the COPYRIGHT file. |
| 6 | + * |
| 7 | + * RELIC is free software: you can redistribute it and/or modify |
| 8 | + * it under the terms of the GNU Lesser General Public License as published by |
| 9 | + * the Free Software Foundation, either version 3 of the License, or |
| 10 | + * (at your option) any later version. |
| 11 | + * |
| 12 | + * RELIC is distributed in the hope that it will be useful, |
| 13 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | + * GNU Lesser General Public License for more details. |
| 16 | + * |
| 17 | + * You should have received a copy of the GNU Lesser General Public License |
| 18 | + * along with RELIC. If not, see <http://www.gnu.org/licenses/>. |
| 19 | + */ |
| 20 | + |
| 21 | +/** |
| 22 | + * @file |
| 23 | + * |
| 24 | + * Useful macros for binary field arithmetic. |
| 25 | + * |
| 26 | + * @version $Id$ |
| 27 | + * @ingroup fb |
| 28 | + */ |
| 29 | + |
| 30 | +#define PSHUFB(A, B) _mm_shuffle_epi8(A, B); |
| 31 | +#define SHL64(A, B) _mm_slli_epi64(A, B) |
| 32 | +#define SHR64(A, B) _mm_srli_epi64(A, B) |
| 33 | +#define XOR(A, B) _mm_xor_si128(A, B) |
| 34 | +#define SHL8(A, B) _mm_slli_si128(A, B) |
| 35 | +#define SHR8(A, B) _mm_srli_si128(A, B) |
| 36 | +#define AND(A, B) _mm_and_si128(A, B) |
| 37 | + |
| 38 | +#define MUL(ma, mb) \ |
| 39 | + t0 = _mm_clmulepi64_si128(ma, mb, 0x00);\ |
| 40 | + t1 = _mm_clmulepi64_si128(ma, mb, 0x11);\ |
| 41 | + t2 = XOR(SHR8(ma, 8), ma);\ |
| 42 | + t3 = XOR(SHR8(mb, 8), mb);\ |
| 43 | + t2 = _mm_clmulepi64_si128(t2, t3, 0x00);\ |
| 44 | + t2 = XOR(t2, t0);\ |
| 45 | + t2 = XOR(t2, t1);\ |
| 46 | + t3 = SHR8(t2, 8);\ |
| 47 | + t2 = SHL8(t2, 8);\ |
| 48 | + t0 = XOR(t0, t2);\ |
| 49 | + t1 = XOR(t1, t3);\ |
| 50 | + |
| 51 | +#define MULDXS(ma, mb) \ |
| 52 | + t0 = _mm_clmulepi64_si128(ma, mb, 0x00);\ |
| 53 | + t2 = _mm_clmulepi64_si128(ma, mb, 0x01);\ |
| 54 | + t1 = SHR8(t2, 8);\ |
| 55 | + t2 = SHL8(t2, 8);\ |
| 56 | + t0 = XOR(t0, t2);\ |
| 57 | + |
| 58 | +#define MULSXD(ma, mb) \ |
| 59 | + MULDXS(mb, ma) |
| 60 | + |
| 61 | +#define RED251(t,m1,m0)\ |
| 62 | + t0 = _mm_slli_si128(t,8);\ |
| 63 | + t1 = _mm_srli_si128(t,8);\ |
| 64 | + m1 = _mm_xor_si128(m1,_mm_srli_epi64(t1,59));\ |
| 65 | + m1 = _mm_xor_si128(m1,_mm_srli_epi64(t1,57));\ |
| 66 | + m1 = _mm_xor_si128(m1,_mm_srli_epi64(t1,55));\ |
| 67 | + m1 = _mm_xor_si128(m1,_mm_srli_epi64(t1,52));\ |
| 68 | + m0 = _mm_xor_si128(m0,_mm_srli_epi64(t0,59));\ |
| 69 | + m0 = _mm_xor_si128(m0,_mm_srli_epi64(t0,57));\ |
| 70 | + m0 = _mm_xor_si128(m0,_mm_srli_epi64(t0,55));\ |
| 71 | + m0 = _mm_xor_si128(m0,_mm_srli_epi64(t0,52));\ |
| 72 | + t0 = _mm_srli_si128(t0,8);\ |
| 73 | + t1 = _mm_slli_si128(t1,8);\ |
| 74 | + m0 = _mm_xor_si128(m0,_mm_slli_epi64(t0,5));\ |
| 75 | + m0 = _mm_xor_si128(m0,_mm_slli_epi64(t0,7));\ |
| 76 | + m0 = _mm_xor_si128(m0,_mm_slli_epi64(t0,9));\ |
| 77 | + m0 = _mm_xor_si128(m0,_mm_slli_epi64(t0,12));\ |
| 78 | + m0 = _mm_xor_si128(m0,_mm_slli_epi64(t1,5));\ |
| 79 | + m0 = _mm_xor_si128(m0,_mm_slli_epi64(t1,7));\ |
| 80 | + m0 = _mm_xor_si128(m0,_mm_slli_epi64(t1,9));\ |
| 81 | + m0 = _mm_xor_si128(m0,_mm_slli_epi64(t1,12)); |
| 82 | + |
| 83 | +#define REDUCE() \ |
| 84 | + RED251(m3,m2,m1); \ |
| 85 | + RED251(m2,m1,m0); \ |
| 86 | + m8 = _mm_srli_si128(m1,8); \ |
| 87 | + m9 = _mm_srli_epi64(m8,59); \ |
| 88 | + m9 = _mm_slli_epi64(m9,59); \ |
| 89 | + m0 = _mm_xor_si128(m0,_mm_srli_epi64(m9,59)); \ |
| 90 | + m0 = _mm_xor_si128(m0,_mm_srli_epi64(m9,57)); \ |
| 91 | + m0 = _mm_xor_si128(m0,_mm_srli_epi64(m9,55)); \ |
| 92 | + m0 = _mm_xor_si128(m0,_mm_srli_epi64(m9,52)); \ |
| 93 | + |
0 commit comments