Skip to content

Commit ac4b8a8

Browse files
authored
Merge pull request #191 from pjbgf/arm64
arm64: Drop unused vregs
2 parents 99b08a1 + 0af2b4f commit ac4b8a8

File tree

1 file changed

+106
-107
lines changed

1 file changed

+106
-107
lines changed

sha1cdblock_arm64.s

Lines changed: 106 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -22,37 +22,37 @@
2222

2323
// FUNC3 f = (b & c) | (b & d) | (c & d)
2424
#define FUNC3(b, c, d) \
25-
MOVW b, R8; \
26-
ORR c, R8, R8; \
27-
ANDW d, R8, R8; \
25+
MOVW b, R27; \
26+
ORR c, R27, R27; \
27+
ANDW d, R27, R27; \
2828
MOVW b, R15; \
2929
ANDW c, R15, R15; \
30-
ORR R8, R15, R15
30+
ORR R27, R15, R15
3131

3232
#define FUNC4(b, c, d) FUNC2(b, c, d)
3333

34-
#define MIX(a, b, c, d, e, k, vreg) \
34+
#define MIX(a, b, c, d, e, k) \
3535
RORW $2, b, b; \
3636
ADDW R15, e, e; \
37-
MOVW a, R8; \
38-
RORW $27, R8, R8; \
37+
MOVW a, R27; \
38+
RORW $27, R27, R27; \
3939
MOVW k, R19; \
4040
ADDW R19, e, e; \
4141
ADDW R9, e, e; \
42-
ADDW R8, e, e
42+
ADDW R27, e, e
4343

4444
#define LOAD(index) \
4545
MOVWU (index*4)(R16), R9; \
4646
REVW R9, R9; \
4747
MOVW R9, (index*4)(RSP)
4848

4949
#define LOADCS(a, b, c, d, e, index) \
50-
MOVD cs_base+56(FP), R8; \
51-
MOVW a, ((index*20))(R8); \
52-
MOVW b, ((index*20)+4)(R8); \
53-
MOVW c, ((index*20)+8)(R8); \
54-
MOVW d, ((index*20)+12)(R8); \
55-
MOVW e, ((index*20)+16)(R8)
50+
MOVD cs_base+56(FP), R27; \
51+
MOVW a, ((index*20))(R27); \
52+
MOVW b, ((index*20)+4)(R27); \
53+
MOVW c, ((index*20)+8)(R27); \
54+
MOVW d, ((index*20)+12)(R27); \
55+
MOVW e, ((index*20)+16)(R27)
5656

5757
#define SHUFFLE(index) \
5858
MOVW ((index&0xf)*4)(RSP), R9; \
@@ -67,38 +67,38 @@
6767

6868
// LOADM1 stores message word to m1 array.
6969
#define LOADM1(index) \
70-
MOVD m1_base+32(FP), R8; \
70+
MOVD m1_base+32(FP), R27; \
7171
MOVW ((index&0xf)*4)(RSP), R9; \
72-
MOVW R9, (index*4)(R8)
72+
MOVW R9, (index*4)(R27)
7373

74-
#define ROUND1(a, b, c, d, e, index, vreg) \
74+
#define ROUND1(a, b, c, d, e, index) \
7575
LOAD(index); \
7676
FUNC1(b, c, d); \
77-
MIX(a, b, c, d, e, RoundConst0, vreg); \
77+
MIX(a, b, c, d, e, RoundConst0); \
7878
LOADM1(index)
7979

80-
#define ROUND1x(a, b, c, d, e, index, vreg) \
80+
#define ROUND1x(a, b, c, d, e, index) \
8181
SHUFFLE(index); \
8282
FUNC1(b, c, d); \
83-
MIX(a, b, c, d, e, RoundConst0, vreg); \
83+
MIX(a, b, c, d, e, RoundConst0); \
8484
LOADM1(index)
8585

86-
#define ROUND2(a, b, c, d, e, index, vreg) \
86+
#define ROUND2(a, b, c, d, e, index) \
8787
SHUFFLE(index); \
8888
FUNC2(b, c, d); \
89-
MIX(a, b, c, d, e, RoundConst1, vreg); \
89+
MIX(a, b, c, d, e, RoundConst1); \
9090
LOADM1(index)
9191

92-
#define ROUND3(a, b, c, d, e, index, vreg) \
92+
#define ROUND3(a, b, c, d, e, index) \
9393
SHUFFLE(index); \
9494
FUNC3(b, c, d); \
95-
MIX(a, b, c, d, e, RoundConst2, vreg); \
95+
MIX(a, b, c, d, e, RoundConst2); \
9696
LOADM1(index)
9797

98-
#define ROUND4(a, b, c, d, e, index, vreg) \
98+
#define ROUND4(a, b, c, d, e, index) \
9999
SHUFFLE(index); \
100100
FUNC4(b, c, d); \
101-
MIX(a, b, c, d, e, RoundConst3, vreg); \
101+
MIX(a, b, c, d, e, RoundConst3); \
102102
LOADM1(index)
103103

104104
// func blockARM64(dig *digest, p []byte, m1 []uint32, cs [][5]uint32)
@@ -132,98 +132,98 @@ loop:
132132

133133
// ROUND1 (steps 0-15)
134134
LOADCS(R10, R11, R12, R13, R14, 0)
135-
ROUND1(R10, R11, R12, R13, R14, 0, V31)
136-
ROUND1(R14, R10, R11, R12, R13, 1, V30)
137-
ROUND1(R13, R14, R10, R11, R12, 2, V29)
138-
ROUND1(R12, R13, R14, R10, R11, 3, V28)
139-
ROUND1(R11, R12, R13, R14, R10, 4, V27)
140-
ROUND1(R10, R11, R12, R13, R14, 5, V26)
141-
ROUND1(R14, R10, R11, R12, R13, 6, V25)
142-
ROUND1(R13, R14, R10, R11, R12, 7, V24)
143-
ROUND1(R12, R13, R14, R10, R11, 8, V23)
144-
ROUND1(R11, R12, R13, R14, R10, 9, V22)
145-
ROUND1(R10, R11, R12, R13, R14, 10, V21)
146-
ROUND1(R14, R10, R11, R12, R13, 11, V20)
147-
ROUND1(R13, R14, R10, R11, R12, 12, V19)
148-
ROUND1(R12, R13, R14, R10, R11, 13, V18)
149-
ROUND1(R11, R12, R13, R14, R10, 14, V17)
150-
ROUND1(R10, R11, R12, R13, R14, 15, V16)
135+
ROUND1(R10, R11, R12, R13, R14, 0)
136+
ROUND1(R14, R10, R11, R12, R13, 1)
137+
ROUND1(R13, R14, R10, R11, R12, 2)
138+
ROUND1(R12, R13, R14, R10, R11, 3)
139+
ROUND1(R11, R12, R13, R14, R10, 4)
140+
ROUND1(R10, R11, R12, R13, R14, 5)
141+
ROUND1(R14, R10, R11, R12, R13, 6)
142+
ROUND1(R13, R14, R10, R11, R12, 7)
143+
ROUND1(R12, R13, R14, R10, R11, 8)
144+
ROUND1(R11, R12, R13, R14, R10, 9)
145+
ROUND1(R10, R11, R12, R13, R14, 10)
146+
ROUND1(R14, R10, R11, R12, R13, 11)
147+
ROUND1(R13, R14, R10, R11, R12, 12)
148+
ROUND1(R12, R13, R14, R10, R11, 13)
149+
ROUND1(R11, R12, R13, R14, R10, 14)
150+
ROUND1(R10, R11, R12, R13, R14, 15)
151151

152152
// ROUND1x (steps 16-19) - same as ROUND1 but with no data load.
153-
ROUND1x(R14, R10, R11, R12, R13, 16, V15)
154-
ROUND1x(R13, R14, R10, R11, R12, 17, V14)
155-
ROUND1x(R12, R13, R14, R10, R11, 18, V13)
156-
ROUND1x(R11, R12, R13, R14, R10, 19, V12)
153+
ROUND1x(R14, R10, R11, R12, R13, 16)
154+
ROUND1x(R13, R14, R10, R11, R12, 17)
155+
ROUND1x(R12, R13, R14, R10, R11, 18)
156+
ROUND1x(R11, R12, R13, R14, R10, 19)
157157

158158
// ROUND2 (steps 20-39)
159-
ROUND2(R10, R11, R12, R13, R14, 20, V11)
160-
ROUND2(R14, R10, R11, R12, R13, 21, V10)
161-
ROUND2(R13, R14, R10, R11, R12, 22, V9)
162-
ROUND2(R12, R13, R14, R10, R11, 23, V8)
163-
ROUND2(R11, R12, R13, R14, R10, 24, V7)
164-
ROUND2(R10, R11, R12, R13, R14, 25, V6)
165-
ROUND2(R14, R10, R11, R12, R13, 26, V5)
166-
ROUND2(R13, R14, R10, R11, R12, 27, V4)
167-
ROUND2(R12, R13, R14, R10, R11, 28, V3)
168-
ROUND2(R11, R12, R13, R14, R10, 29, V2)
169-
ROUND2(R10, R11, R12, R13, R14, 30, V1)
170-
ROUND2(R14, R10, R11, R12, R13, 31, V0)
171-
ROUND2(R13, R14, R10, R11, R12, 32, V31)
172-
ROUND2(R12, R13, R14, R10, R11, 33, V30)
173-
ROUND2(R11, R12, R13, R14, R10, 34, V29)
174-
ROUND2(R10, R11, R12, R13, R14, 35, V28)
175-
ROUND2(R14, R10, R11, R12, R13, 36, V27)
176-
ROUND2(R13, R14, R10, R11, R12, 37, V26)
177-
ROUND2(R12, R13, R14, R10, R11, 38, V25)
178-
ROUND2(R11, R12, R13, R14, R10, 39, V24)
159+
ROUND2(R10, R11, R12, R13, R14, 20)
160+
ROUND2(R14, R10, R11, R12, R13, 21)
161+
ROUND2(R13, R14, R10, R11, R12, 22)
162+
ROUND2(R12, R13, R14, R10, R11, 23)
163+
ROUND2(R11, R12, R13, R14, R10, 24)
164+
ROUND2(R10, R11, R12, R13, R14, 25)
165+
ROUND2(R14, R10, R11, R12, R13, 26)
166+
ROUND2(R13, R14, R10, R11, R12, 27)
167+
ROUND2(R12, R13, R14, R10, R11, 28)
168+
ROUND2(R11, R12, R13, R14, R10, 29)
169+
ROUND2(R10, R11, R12, R13, R14, 30)
170+
ROUND2(R14, R10, R11, R12, R13, 31)
171+
ROUND2(R13, R14, R10, R11, R12, 32)
172+
ROUND2(R12, R13, R14, R10, R11, 33)
173+
ROUND2(R11, R12, R13, R14, R10, 34)
174+
ROUND2(R10, R11, R12, R13, R14, 35)
175+
ROUND2(R14, R10, R11, R12, R13, 36)
176+
ROUND2(R13, R14, R10, R11, R12, 37)
177+
ROUND2(R12, R13, R14, R10, R11, 38)
178+
ROUND2(R11, R12, R13, R14, R10, 39)
179179

180180
// ROUND3 (steps 40-59)
181-
ROUND3(R10, R11, R12, R13, R14, 40, V23)
182-
ROUND3(R14, R10, R11, R12, R13, 41, V22)
183-
ROUND3(R13, R14, R10, R11, R12, 42, V21)
184-
ROUND3(R12, R13, R14, R10, R11, 43, V20)
185-
ROUND3(R11, R12, R13, R14, R10, 44, V19)
186-
ROUND3(R10, R11, R12, R13, R14, 45, V18)
187-
ROUND3(R14, R10, R11, R12, R13, 46, V17)
188-
ROUND3(R13, R14, R10, R11, R12, 47, V16)
189-
ROUND3(R12, R13, R14, R10, R11, 48, V15)
190-
ROUND3(R11, R12, R13, R14, R10, 49, V14)
191-
ROUND3(R10, R11, R12, R13, R14, 50, V13)
192-
ROUND3(R14, R10, R11, R12, R13, 51, V12)
193-
ROUND3(R13, R14, R10, R11, R12, 52, V11)
194-
ROUND3(R12, R13, R14, R10, R11, 53, V10)
195-
ROUND3(R11, R12, R13, R14, R10, 54, V9)
196-
ROUND3(R10, R11, R12, R13, R14, 55, V8)
197-
ROUND3(R14, R10, R11, R12, R13, 56, V7)
198-
ROUND3(R13, R14, R10, R11, R12, 57, V6)
181+
ROUND3(R10, R11, R12, R13, R14, 40)
182+
ROUND3(R14, R10, R11, R12, R13, 41)
183+
ROUND3(R13, R14, R10, R11, R12, 42)
184+
ROUND3(R12, R13, R14, R10, R11, 43)
185+
ROUND3(R11, R12, R13, R14, R10, 44)
186+
ROUND3(R10, R11, R12, R13, R14, 45)
187+
ROUND3(R14, R10, R11, R12, R13, 46)
188+
ROUND3(R13, R14, R10, R11, R12, 47)
189+
ROUND3(R12, R13, R14, R10, R11, 48)
190+
ROUND3(R11, R12, R13, R14, R10, 49)
191+
ROUND3(R10, R11, R12, R13, R14, 50)
192+
ROUND3(R14, R10, R11, R12, R13, 51)
193+
ROUND3(R13, R14, R10, R11, R12, 52)
194+
ROUND3(R12, R13, R14, R10, R11, 53)
195+
ROUND3(R11, R12, R13, R14, R10, 54)
196+
ROUND3(R10, R11, R12, R13, R14, 55)
197+
ROUND3(R14, R10, R11, R12, R13, 56)
198+
ROUND3(R13, R14, R10, R11, R12, 57)
199199

200200
LOADCS(R12, R13, R14, R10, R11, 1)
201-
ROUND3(R12, R13, R14, R10, R11, 58, V5)
202-
ROUND3(R11, R12, R13, R14, R10, 59, V4)
201+
ROUND3(R12, R13, R14, R10, R11, 58)
202+
ROUND3(R11, R12, R13, R14, R10, 59)
203203

204204
// ROUND4 (steps 60-79)
205-
ROUND4(R10, R11, R12, R13, R14, 60, V3)
206-
ROUND4(R14, R10, R11, R12, R13, 61, V2)
207-
ROUND4(R13, R14, R10, R11, R12, 62, V1)
208-
ROUND4(R12, R13, R14, R10, R11, 63, V0)
209-
ROUND4(R11, R12, R13, R14, R10, 64, V31)
205+
ROUND4(R10, R11, R12, R13, R14, 60)
206+
ROUND4(R14, R10, R11, R12, R13, 61)
207+
ROUND4(R13, R14, R10, R11, R12, 62)
208+
ROUND4(R12, R13, R14, R10, R11, 63)
209+
ROUND4(R11, R12, R13, R14, R10, 64)
210210

211211
LOADCS(R10, R11, R12, R13, R14, 2)
212-
ROUND4(R10, R11, R12, R13, R14, 65, V30)
213-
ROUND4(R14, R10, R11, R12, R13, 66, V29)
214-
ROUND4(R13, R14, R10, R11, R12, 67, V28)
215-
ROUND4(R12, R13, R14, R10, R11, 68, V27)
216-
ROUND4(R11, R12, R13, R14, R10, 69, V26)
217-
ROUND4(R10, R11, R12, R13, R14, 70, V25)
218-
ROUND4(R14, R10, R11, R12, R13, 71, V24)
219-
ROUND4(R13, R14, R10, R11, R12, 72, V23)
220-
ROUND4(R12, R13, R14, R10, R11, 73, V22)
221-
ROUND4(R11, R12, R13, R14, R10, 74, V21)
222-
ROUND4(R10, R11, R12, R13, R14, 75, V20)
223-
ROUND4(R14, R10, R11, R12, R13, 76, V19)
224-
ROUND4(R13, R14, R10, R11, R12, 77, V18)
225-
ROUND4(R12, R13, R14, R10, R11, 78, V17)
226-
ROUND4(R11, R12, R13, R14, R10, 79, V16)
212+
ROUND4(R10, R11, R12, R13, R14, 65)
213+
ROUND4(R14, R10, R11, R12, R13, 66)
214+
ROUND4(R13, R14, R10, R11, R12, 67)
215+
ROUND4(R12, R13, R14, R10, R11, 68)
216+
ROUND4(R11, R12, R13, R14, R10, 69)
217+
ROUND4(R10, R11, R12, R13, R14, 70)
218+
ROUND4(R14, R10, R11, R12, R13, 71)
219+
ROUND4(R13, R14, R10, R11, R12, 72)
220+
ROUND4(R12, R13, R14, R10, R11, 73)
221+
ROUND4(R11, R12, R13, R14, R10, 74)
222+
ROUND4(R10, R11, R12, R13, R14, 75)
223+
ROUND4(R14, R10, R11, R12, R13, 76)
224+
ROUND4(R13, R14, R10, R11, R12, 77)
225+
ROUND4(R12, R13, R14, R10, R11, 78)
226+
ROUND4(R11, R12, R13, R14, R10, 79)
227227

228228
// Add registers to temp hash.
229229
ADDW R10, R1, R1
@@ -236,7 +236,6 @@ loop:
236236
B loop
237237

238238
end:
239-
MOVD dig+0(FP), R8
240239
MOVW R1, (R8)
241240
MOVW R2, 4(R8)
242241
MOVW R3, 8(R8)

0 commit comments

Comments
 (0)