Skip to content

Commit 85d5c0a

Browse files
arielb1Ariel Ben-Yehuda
and
Ariel Ben-Yehuda
authored
hand-assemble instructions not supported in old binutils (#2471)
* hand-assemble instructions not present in old binutils * add script for regenerating asmMap --------- Co-authored-by: Ariel Ben-Yehuda <[email protected]>
1 parent e98f47f commit 85d5c0a

File tree

2 files changed

+110
-1
lines changed

2 files changed

+110
-1
lines changed

crypto/fipsmodule/aes/asm/aes-gcm-avx2-x86_64.pl

+68-1
Original file line numberDiff line numberDiff line change
@@ -966,6 +966,73 @@ sub _aes_gcm_update {
966966
$code .= _aes_gcm_update 0;
967967
$code .= _end_func;
968968

969-
print $code;
969+
sub filter_and_print {
970+
# This function replaces AVX2 assembly instructions with their assembled forms,
971+
# to allow the code to work on old versions of binutils (older than 2.30) that do
972+
# not support these instructions.
973+
my %asmMap = (
974+
'vaesenc %ymm2, %ymm12, %ymm12' => '.byte 0xc4,0x62,0x1d,0xdc,0xe2',
975+
'vaesenc %ymm2, %ymm13, %ymm13' => '.byte 0xc4,0x62,0x15,0xdc,0xea',
976+
'vaesenc %ymm2, %ymm14, %ymm14' => '.byte 0xc4,0x62,0x0d,0xdc,0xf2',
977+
'vaesenc %ymm2, %ymm15, %ymm15' => '.byte 0xc4,0x62,0x05,0xdc,0xfa',
978+
'vaesenclast %ymm10, %ymm12, %ymm12' => '.byte 0xc4,0x42,0x1d,0xdd,0xe2',
979+
'vaesenclast %ymm10, %ymm13, %ymm13' => '.byte 0xc4,0x42,0x15,0xdd,0xea',
980+
'vaesenclast %ymm2, %ymm12, %ymm12' => '.byte 0xc4,0x62,0x1d,0xdd,0xe2',
981+
'vaesenclast %ymm3, %ymm13, %ymm13' => '.byte 0xc4,0x62,0x15,0xdd,0xeb',
982+
'vaesenclast %ymm5, %ymm14, %ymm14' => '.byte 0xc4,0x62,0x0d,0xdd,0xf5',
983+
'vaesenclast %ymm6, %ymm15, %ymm15' => '.byte 0xc4,0x62,0x05,0xdd,0xfe',
984+
'vpclmulqdq $0x00, %ymm2, %ymm12, %ymm4' => '.byte 0xc4,0xe3,0x1d,0x44,0xe2,0x00',
985+
'vpclmulqdq $0x00, %ymm2, %ymm12, %ymm5' => '.byte 0xc4,0xe3,0x1d,0x44,0xea,0x00',
986+
'vpclmulqdq $0x00, %ymm3, %ymm13, %ymm4' => '.byte 0xc4,0xe3,0x15,0x44,0xe3,0x00',
987+
'vpclmulqdq $0x00, %ymm4, %ymm3, %ymm2' => '.byte 0xc4,0xe3,0x65,0x44,0xd4,0x00',
988+
'vpclmulqdq $0x00, %ymm4, %ymm3, %ymm5' => '.byte 0xc4,0xe3,0x65,0x44,0xec,0x00',
989+
'vpclmulqdq $0x00, %ymm5, %ymm3, %ymm0' => '.byte 0xc4,0xe3,0x65,0x44,0xc5,0x00',
990+
'vpclmulqdq $0x00, %ymm5, %ymm4, %ymm0' => '.byte 0xc4,0xe3,0x5d,0x44,0xc5,0x00',
991+
'vpclmulqdq $0x00, %ymm7, %ymm2, %ymm6' => '.byte 0xc4,0xe3,0x6d,0x44,0xf7,0x00',
992+
'vpclmulqdq $0x00, %ymm8, %ymm2, %ymm2' => '.byte 0xc4,0xc3,0x6d,0x44,0xd0,0x00',
993+
'vpclmulqdq $0x01, %ymm0, %ymm6, %ymm2' => '.byte 0xc4,0xe3,0x4d,0x44,0xd0,0x01',
994+
'vpclmulqdq $0x01, %ymm1, %ymm6, %ymm0' => '.byte 0xc4,0xe3,0x4d,0x44,0xc1,0x01',
995+
'vpclmulqdq $0x01, %ymm2, %ymm12, %ymm4' => '.byte 0xc4,0xe3,0x1d,0x44,0xe2,0x01',
996+
'vpclmulqdq $0x01, %ymm2, %ymm12, %ymm6' => '.byte 0xc4,0xe3,0x1d,0x44,0xf2,0x01',
997+
'vpclmulqdq $0x01, %ymm3, %ymm13, %ymm4' => '.byte 0xc4,0xe3,0x15,0x44,0xe3,0x01',
998+
'vpclmulqdq $0x01, %ymm5, %ymm2, %ymm3' => '.byte 0xc4,0xe3,0x6d,0x44,0xdd,0x01',
999+
'vpclmulqdq $0x01, %ymm5, %ymm3, %ymm1' => '.byte 0xc4,0xe3,0x65,0x44,0xcd,0x01',
1000+
'vpclmulqdq $0x01, %ymm5, %ymm4, %ymm1' => '.byte 0xc4,0xe3,0x5d,0x44,0xcd,0x01',
1001+
'vpclmulqdq $0x01, %ymm5, %ymm4, %ymm2' => '.byte 0xc4,0xe3,0x5d,0x44,0xd5,0x01',
1002+
'vpclmulqdq $0x01, %ymm6, %ymm2, %ymm3' => '.byte 0xc4,0xe3,0x6d,0x44,0xde,0x01',
1003+
'vpclmulqdq $0x01, %ymm6, %ymm4, %ymm2' => '.byte 0xc4,0xe3,0x5d,0x44,0xd6,0x01',
1004+
'vpclmulqdq $0x10, %ymm2, %ymm12, %ymm4' => '.byte 0xc4,0xe3,0x1d,0x44,0xe2,0x10',
1005+
'vpclmulqdq $0x10, %ymm3, %ymm13, %ymm4' => '.byte 0xc4,0xe3,0x15,0x44,0xe3,0x10',
1006+
'vpclmulqdq $0x10, %ymm5, %ymm3, %ymm2' => '.byte 0xc4,0xe3,0x65,0x44,0xd5,0x10',
1007+
'vpclmulqdq $0x10, %ymm5, %ymm4, %ymm2' => '.byte 0xc4,0xe3,0x5d,0x44,0xd5,0x10',
1008+
'vpclmulqdq $0x10, %ymm7, %ymm2, %ymm2' => '.byte 0xc4,0xe3,0x6d,0x44,0xd7,0x10',
1009+
'vpclmulqdq $0x10, %ymm8, %ymm2, %ymm2' => '.byte 0xc4,0xc3,0x6d,0x44,0xd0,0x10',
1010+
'vpclmulqdq $0x11, %ymm2, %ymm12, %ymm4' => '.byte 0xc4,0xe3,0x1d,0x44,0xe2,0x11',
1011+
'vpclmulqdq $0x11, %ymm2, %ymm12, %ymm7' => '.byte 0xc4,0xe3,0x1d,0x44,0xfa,0x11',
1012+
'vpclmulqdq $0x11, %ymm3, %ymm13, %ymm4' => '.byte 0xc4,0xe3,0x15,0x44,0xe3,0x11',
1013+
'vpclmulqdq $0x11, %ymm4, %ymm3, %ymm1' => '.byte 0xc4,0xe3,0x65,0x44,0xcc,0x11',
1014+
'vpclmulqdq $0x11, %ymm4, %ymm3, %ymm2' => '.byte 0xc4,0xe3,0x65,0x44,0xd4,0x11',
1015+
'vpclmulqdq $0x11, %ymm5, %ymm3, %ymm4' => '.byte 0xc4,0xe3,0x65,0x44,0xe5,0x11',
1016+
'vpclmulqdq $0x11, %ymm5, %ymm4, %ymm3' => '.byte 0xc4,0xe3,0x5d,0x44,0xdd,0x11',
1017+
);
1018+
for my $line (split("\n",$code)) {
1019+
my $trimmed;
1020+
$trimmed = $line;
1021+
$trimmed =~ s/^\s+//;
1022+
$trimmed =~ s/\s+(#.*)?$//;
1023+
if (exists $asmMap{$trimmed}) {
1024+
$line = $asmMap{$trimmed};
1025+
} else {
1026+
if($trimmed =~ /(vpclmulqdq|vaes).*%[yz]mm/) {
1027+
die ("found instruction not supported under old binutils, please update asmMap with the results of running\n" .
1028+
'find target -name "*aes-gcm-avx2*.o" -exec python3 crypto/fipsmodule/aes/asm/make-avx-map-for-old-binutils.py \{\} \; | LC_ALL=C sort | uniq');
1029+
}
1030+
}
1031+
print $line,"\n";
1032+
}
1033+
}
1034+
1035+
filter_and_print();
1036+
9701037
close STDOUT or die "error closing STDOUT: $!";
9711038
exit 0;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/usr/bin/python3
2+
3+
import argparse
4+
import subprocess
5+
import re
6+
import sys
7+
8+
PCLMUL_RE = re.compile(r'^\s+[0-9a-f]+:\s+(?P<disas>(?:[0-9a-f][0-9a-f] )+)\s+vpclmul(?P<type>[0-9a-z]+)dq (?P<args>.*%ymm.*)$')
9+
NON_PCLMUL_RE = re.compile(r'^\s+[0-9a-f]+:\s+(?P<disas>(?:[0-9a-f][0-9a-f] )+)\s+(?P<instruction>vaesenc|vaesenclast) (?P<args>.*%ymm.*)$')
10+
11+
TYPE_MAP = {
12+
'lqlq': 0x00,
13+
'lqhq': 0x10,
14+
'hqlq': 0x01,
15+
'hqhq': 0x11,
16+
}
17+
18+
def hexify_disas(disas):
19+
return (' '+disas.strip()).replace(' ', ',0x')[1:]
20+
21+
def main():
22+
parser = argparse.ArgumentParser(
23+
prog='make-avx-map-for-old-binutils',
24+
description='Generate a map file for old binutils from .o files'
25+
)
26+
parser.add_argument('filename', nargs='+', help='object file to generate map from')
27+
parsed = parser.parse_args()
28+
for filename in parsed.filename:
29+
for line in subprocess.check_output(['objdump', '-d', filename], stderr=sys.stderr).decode('utf-8').split('\n'):
30+
if match := PCLMUL_RE.match(line):
31+
hexified_disas = hexify_disas(match.group('disas'))
32+
ty = TYPE_MAP[match.group('type')]
33+
args = match.group('args').replace(',', ', ')
34+
print(f" 'vpclmulqdq $0x{ty:02x}, {args}' => '.byte {hexified_disas}',")
35+
elif match := NON_PCLMUL_RE.match(line):
36+
hexified_disas = hexify_disas(match.group('disas'))
37+
args = match.group('args').replace(',', ', ')
38+
print(f" '{match.group('instruction').ljust(16)}{args}' => '.byte {hexified_disas}',")
39+
40+
41+
if __name__ == '__main__':
42+
main()

0 commit comments

Comments
 (0)