|
| 1 | +#include "sha256.h" |
| 2 | + |
| 3 | +#define MAKE_FN_NAME(x) export void poh_verify_many_simd_ ## x (uniform u8 hashes[], uniform const unsigned int64 num_hashes_arr[]) |
| 4 | +#define FUNCTION_NAME(signal) MAKE_FN_NAME(signal) |
| 5 | + |
| 6 | +FUNCTION_NAME(NAME_SUFFIX) |
| 7 | +{ |
| 8 | + foreach(i = 0 ... programCount) { |
| 9 | + u8* hash = &hashes[i * SHA256_BLOCK_SIZE]; |
| 10 | + varying u32 s[8]; |
| 11 | + varying u32 w[64]; |
| 12 | + varying u32 T0; |
| 13 | + varying u32 T1; |
| 14 | + |
| 15 | + // Load words |
| 16 | + for (int j = 0; j < SHA256_BLOCK_SIZE / 4; j++) { |
| 17 | + w[j] = (((u32)hash[j * 4] << 24) | |
| 18 | + ((u32)hash[j * 4 + 1] << 16) | |
| 19 | + ((u32)hash[j * 4 + 2] << 8) | |
| 20 | + ((u32)hash[j * 4 + 3])); |
| 21 | + } |
| 22 | + |
| 23 | + if (num_hashes_arr[i] > 0) { |
| 24 | + for (int j = 0; j < num_hashes_arr[i]; j++) { |
| 25 | + s[0] = 0x6a09e667; |
| 26 | + s[1] = 0xbb67ae85; |
| 27 | + s[2] = 0x3c6ef372; |
| 28 | + s[3] = 0xa54ff53a; |
| 29 | + s[4] = 0x510e527f; |
| 30 | + s[5] = 0x9b05688c; |
| 31 | + s[6] = 0x1f83d9ab; |
| 32 | + s[7] = 0x5be0cd19; |
| 33 | + |
| 34 | + w[8] = 0x80000000; |
| 35 | + w[9] = 0; |
| 36 | + w[10] = 0; |
| 37 | + w[11] = 0; |
| 38 | + w[12] = 0; |
| 39 | + w[13] = 0; |
| 40 | + w[14] = 0; |
| 41 | + w[15] = 0x00000100; |
| 42 | + |
| 43 | + SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0, w[0]); |
| 44 | + SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 1, w[1]); |
| 45 | + SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 2, w[2]); |
| 46 | + SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 3, w[3]); |
| 47 | + SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 4, w[4]); |
| 48 | + SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 5, w[5]); |
| 49 | + SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 6, w[6]); |
| 50 | + SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 7, w[7]); |
| 51 | + SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8, w[8]); |
| 52 | + SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 9, w[9]); |
| 53 | + SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 10, w[10]); |
| 54 | + SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 11, w[11]); |
| 55 | + SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 12, w[12]); |
| 56 | + SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 13, w[13]); |
| 57 | + SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 14, w[14]); |
| 58 | + SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 15, w[15]); |
| 59 | + w[16] = WSIGMA1(w[14]) + w[0] + w[9] + WSIGMA0(w[1]); |
| 60 | + SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16, w[16]); |
| 61 | + w[17] = WSIGMA1(w[15]) + w[1] + w[10] + WSIGMA0(w[2]); |
| 62 | + SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 17, w[17]); |
| 63 | + w[18] = WSIGMA1(w[16]) + w[2] + w[11] + WSIGMA0(w[3]); |
| 64 | + SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 18, w[18]); |
| 65 | + w[19] = WSIGMA1(w[17]) + w[3] + w[12] + WSIGMA0(w[4]); |
| 66 | + SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 19, w[19]); |
| 67 | + w[20] = WSIGMA1(w[18]) + w[4] + w[13] + WSIGMA0(w[5]); |
| 68 | + SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 20, w[20]); |
| 69 | + w[21] = WSIGMA1(w[19]) + w[5] + w[14] + WSIGMA0(w[6]); |
| 70 | + SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 21, w[21]); |
| 71 | + w[22] = WSIGMA1(w[20]) + w[6] + w[15] + WSIGMA0(w[7]); |
| 72 | + SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 22, w[22]); |
| 73 | + w[23] = WSIGMA1(w[21]) + w[7] + w[16] + WSIGMA0(w[8]); |
| 74 | + SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 23, w[23]); |
| 75 | + w[24] = WSIGMA1(w[22]) + w[8] + w[17] + WSIGMA0(w[9]); |
| 76 | + SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 24, w[24]); |
| 77 | + w[25] = WSIGMA1(w[23]) + w[9] + w[18] + WSIGMA0(w[10]); |
| 78 | + SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 25, w[25]); |
| 79 | + w[26] = WSIGMA1(w[24]) + w[10] + w[19] + WSIGMA0(w[11]); |
| 80 | + SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 26, w[26]); |
| 81 | + w[27] = WSIGMA1(w[25]) + w[11] + w[20] + WSIGMA0(w[12]); |
| 82 | + SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 27, w[27]); |
| 83 | + w[28] = WSIGMA1(w[26]) + w[12] + w[21] + WSIGMA0(w[13]); |
| 84 | + SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 28, w[28]); |
| 85 | + w[29] = WSIGMA1(w[27]) + w[13] + w[22] + WSIGMA0(w[14]); |
| 86 | + SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 29, w[29]); |
| 87 | + w[30] = WSIGMA1(w[28]) + w[14] + w[23] + WSIGMA0(w[15]); |
| 88 | + SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 30, w[30]); |
| 89 | + w[31] = WSIGMA1(w[29]) + w[15] + w[24] + WSIGMA0(w[16]); |
| 90 | + SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 31, w[31]); |
| 91 | + w[32] = WSIGMA1(w[30]) + w[16] + w[25] + WSIGMA0(w[17]); |
| 92 | + SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 32, w[32]); |
| 93 | + w[33] = WSIGMA1(w[31]) + w[17] + w[26] + WSIGMA0(w[18]); |
| 94 | + SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 33, w[33]); |
| 95 | + w[34] = WSIGMA1(w[32]) + w[18] + w[27] + WSIGMA0(w[19]); |
| 96 | + SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 34, w[34]); |
| 97 | + w[35] = WSIGMA1(w[33]) + w[19] + w[28] + WSIGMA0(w[20]); |
| 98 | + SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 35, w[35]); |
| 99 | + w[36] = WSIGMA1(w[34]) + w[20] + w[29] + WSIGMA0(w[21]); |
| 100 | + SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 36, w[36]); |
| 101 | + w[37] = WSIGMA1(w[35]) + w[21] + w[30] + WSIGMA0(w[22]); |
| 102 | + SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 37, w[37]); |
| 103 | + w[38] = WSIGMA1(w[36]) + w[22] + w[31] + WSIGMA0(w[23]); |
| 104 | + SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 38, w[38]); |
| 105 | + w[39] = WSIGMA1(w[37]) + w[23] + w[32] + WSIGMA0(w[24]); |
| 106 | + SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 39, w[39]); |
| 107 | + w[40] = WSIGMA1(w[38]) + w[24] + w[33] + WSIGMA0(w[25]); |
| 108 | + SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 40, w[40]); |
| 109 | + w[41] = WSIGMA1(w[39]) + w[25] + w[34] + WSIGMA0(w[26]); |
| 110 | + SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 41, w[41]); |
| 111 | + w[42] = WSIGMA1(w[40]) + w[26] + w[35] + WSIGMA0(w[27]); |
| 112 | + SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 42, w[42]); |
| 113 | + w[43] = WSIGMA1(w[41]) + w[27] + w[36] + WSIGMA0(w[28]); |
| 114 | + SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 43, w[43]); |
| 115 | + w[44] = WSIGMA1(w[42]) + w[28] + w[37] + WSIGMA0(w[29]); |
| 116 | + SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 44, w[44]); |
| 117 | + w[45] = WSIGMA1(w[43]) + w[29] + w[38] + WSIGMA0(w[30]); |
| 118 | + SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 45, w[45]); |
| 119 | + w[46] = WSIGMA1(w[44]) + w[30] + w[39] + WSIGMA0(w[31]); |
| 120 | + SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 46, w[46]); |
| 121 | + w[47] = WSIGMA1(w[45]) + w[31] + w[40] + WSIGMA0(w[32]); |
| 122 | + SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 47, w[47]); |
| 123 | + w[48] = WSIGMA1(w[46]) + w[32] + w[41] + WSIGMA0(w[33]); |
| 124 | + SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 48, w[48]); |
| 125 | + w[49] = WSIGMA1(w[47]) + w[33] + w[42] + WSIGMA0(w[34]); |
| 126 | + SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 49, w[49]); |
| 127 | + w[50] = WSIGMA1(w[48]) + w[34] + w[43] + WSIGMA0(w[35]); |
| 128 | + SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 50, w[50]); |
| 129 | + w[51] = WSIGMA1(w[49]) + w[35] + w[44] + WSIGMA0(w[36]); |
| 130 | + SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 51, w[51]); |
| 131 | + w[52] = WSIGMA1(w[50]) + w[36] + w[45] + WSIGMA0(w[37]); |
| 132 | + SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 52, w[52]); |
| 133 | + w[53] = WSIGMA1(w[51]) + w[37] + w[46] + WSIGMA0(w[38]); |
| 134 | + SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 53, w[53]); |
| 135 | + w[54] = WSIGMA1(w[52]) + w[38] + w[47] + WSIGMA0(w[39]); |
| 136 | + SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 54, w[54]); |
| 137 | + w[55] = WSIGMA1(w[53]) + w[39] + w[48] + WSIGMA0(w[40]); |
| 138 | + SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 55, w[55]); |
| 139 | + w[56] = WSIGMA1(w[54]) + w[40] + w[49] + WSIGMA0(w[41]); |
| 140 | + SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 56, w[56]); |
| 141 | + w[57] = WSIGMA1(w[55]) + w[41] + w[50] + WSIGMA0(w[42]); |
| 142 | + SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 57, w[57]); |
| 143 | + w[58] = WSIGMA1(w[56]) + w[42] + w[51] + WSIGMA0(w[43]); |
| 144 | + SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 58, w[58]); |
| 145 | + w[59] = WSIGMA1(w[57]) + w[43] + w[52] + WSIGMA0(w[44]); |
| 146 | + SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 59, w[59]); |
| 147 | + w[60] = WSIGMA1(w[58]) + w[44] + w[53] + WSIGMA0(w[45]); |
| 148 | + SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 60, w[60]); |
| 149 | + w[61] = WSIGMA1(w[59]) + w[45] + w[54] + WSIGMA0(w[46]); |
| 150 | + SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 61, w[61]); |
| 151 | + w[62] = WSIGMA1(w[60]) + w[46] + w[55] + WSIGMA0(w[47]); |
| 152 | + SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 62, w[62]); |
| 153 | + w[63] = WSIGMA1(w[61]) + w[47] + w[56] + WSIGMA0(w[48]); |
| 154 | + SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 63, w[63]); |
| 155 | + |
| 156 | + // Feed Forward |
| 157 | + s[0] = s[0] + 0x6a09e667; |
| 158 | + s[1] = s[1] + 0xbb67ae85; |
| 159 | + s[2] = s[2] + 0x3c6ef372; |
| 160 | + s[3] = s[3] + 0xa54ff53a; |
| 161 | + s[4] = s[4] + 0x510e527f; |
| 162 | + s[5] = s[5] + 0x9b05688c; |
| 163 | + s[6] = s[6] + 0x1f83d9ab; |
| 164 | + s[7] = s[7] + 0x5be0cd19; |
| 165 | + |
| 166 | + // Store Hash value |
| 167 | + for (int k = 0; k < 8; k++) { |
| 168 | + w[k] = s[k]; |
| 169 | + } |
| 170 | + } |
| 171 | + |
| 172 | + // Store Hash value |
| 173 | + for (int j = 0; j < SHA256_BLOCK_SIZE / 4; j++) { |
| 174 | + hash[j * 4 + 3] = s[j] & 0xff; |
| 175 | + hash[j * 4 + 2] = (s[j] >> 8) & 0xff; |
| 176 | + hash[j * 4 + 1] = (s[j] >> 16) & 0xff; |
| 177 | + hash[j * 4 + 0] = (s[j] >> 24) & 0xff; |
| 178 | + } |
| 179 | + } |
| 180 | + } |
| 181 | +} |
0 commit comments