Skip to content

Commit 6ee8585

Browse files
author
Ariel Ben-Yehuda
committed
hand-assemble instructions that are not supported on old binutils
Fixes compilation on AL2 [binutils < 2.30]
1 parent 3542fbc commit 6ee8585

File tree

3 files changed

+121
-1
lines changed

3 files changed

+121
-1
lines changed

crypto/fipsmodule/aes/asm/aes-gcm-avx2-x86_64.pl

+6-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,12 @@
5757
or ( $xlate = "${dir}../../../perlasm/x86_64-xlate.pl" and -f $xlate )
5858
or die "can't locate x86_64-xlate.pl";
5959

60-
open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\"";
60+
my $xlate_binutils;
61+
( $xlate_binutils = "${dir}xlate-old-binutils.pl" and -f $xlate_binutils )
62+
or ( $xlate_binutils = "${dir}../../../perlasm/xlate-old-binutils.pl" and -f $xlate_binutils )
63+
or die "can't locate xlate-old-binutils.pl";
64+
65+
open OUT, "| \"$^X\" $xlate_binutils | \"$^X\" \"$xlate\" $flavour \"$output\"";
6166
*STDOUT = *OUT;
6267

6368
my $g_cur_func_name;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/usr/bin/python3
2+
3+
import argparse
4+
import subprocess
5+
import re
6+
import sys
7+
8+
PCLMUL_RE = re.compile(r'^\s+[0-9a-f]+:\s+(?P<disas>(?:[0-9a-f][0-9a-f] )+)\s+vpclmul(?P<type>[0-9a-z]+)dq (?P<args>.*%ymm.*)$')
9+
NON_PCLMUL_RE = re.compile(r'^\s+[0-9a-f]+:\s+(?P<disas>(?:[0-9a-f][0-9a-f] )+)\s+(?P<instruction>vaesenc|vaesenclast) (?P<args>.*%ymm.*)$')
10+
11+
TYPE_MAP = {
12+
'lqlq': 0x00,
13+
'lqhq': 0x10,
14+
'hqlq': 0x01,
15+
'hqhq': 0x11,
16+
}
17+
18+
def hexify_disas(disas):
19+
return (' '+disas.strip()).replace(' ', ',0x')[1:]
20+
21+
def main():
22+
parser = argparse.ArgumentParser(
23+
prog='make-avx-map-for-old-binutils',
24+
description='Generate a map file for old binutils from .o files'
25+
)
26+
parser.add_argument('filename', nargs='+', help='object file to generate map from')
27+
parsed = parser.parse_args()
28+
for filename in parsed.filename:
29+
for line in subprocess.check_output(['objdump', '-d', filename], stderr=sys.stderr).decode('utf-8').split('\n'):
30+
if match := PCLMUL_RE.match(line):
31+
hexified_disas = hexify_disas(match.group('disas'))
32+
ty = TYPE_MAP[match.group('type')]
33+
args = match.group('args').replace(',', ', ')
34+
print(f" 'vpclmulqdq $0x{ty:02x}, {args}' => '.byte {hexified_disas}',")
35+
elif match := NON_PCLMUL_RE.match(line):
36+
hexified_disas = hexify_disas(match.group('disas'))
37+
args = match.group('args').replace(',', ', ')
38+
print(f" '{match.group('instruction').ljust(16)}{args}' => '.byte {hexified_disas}',")
39+
40+
41+
if __name__ == '__main__':
42+
main()

crypto/perlasm/xlate-old-binutils.pl

+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#!/usr/bin/env perl
2+
3+
sub filter {
4+
my ($line) = @_;
5+
# This function replaces AVX2 assembly instructions with their assembled forms,
6+
# to allow the code to work on old versions of binutils (older than 2.30) that do
7+
# not support these instructions.
8+
my %asmMap = (
9+
'vaesenc %ymm2, %ymm12, %ymm12' => '.byte 0xc4,0x62,0x1d,0xdc,0xe2',
10+
'vaesenc %ymm2, %ymm13, %ymm13' => '.byte 0xc4,0x62,0x15,0xdc,0xea',
11+
'vaesenc %ymm2, %ymm14, %ymm14' => '.byte 0xc4,0x62,0x0d,0xdc,0xf2',
12+
'vaesenc %ymm2, %ymm15, %ymm15' => '.byte 0xc4,0x62,0x05,0xdc,0xfa',
13+
'vaesenclast %ymm10, %ymm12, %ymm12' => '.byte 0xc4,0x42,0x1d,0xdd,0xe2',
14+
'vaesenclast %ymm10, %ymm13, %ymm13' => '.byte 0xc4,0x42,0x15,0xdd,0xea',
15+
'vaesenclast %ymm2, %ymm12, %ymm12' => '.byte 0xc4,0x62,0x1d,0xdd,0xe2',
16+
'vaesenclast %ymm3, %ymm13, %ymm13' => '.byte 0xc4,0x62,0x15,0xdd,0xeb',
17+
'vaesenclast %ymm5, %ymm14, %ymm14' => '.byte 0xc4,0x62,0x0d,0xdd,0xf5',
18+
'vaesenclast %ymm6, %ymm15, %ymm15' => '.byte 0xc4,0x62,0x05,0xdd,0xfe',
19+
'vpclmulqdq $0x00, %ymm2, %ymm12, %ymm4' => '.byte 0xc4,0xe3,0x1d,0x44,0xe2,0x00',
20+
'vpclmulqdq $0x00, %ymm2, %ymm12, %ymm5' => '.byte 0xc4,0xe3,0x1d,0x44,0xea,0x00',
21+
'vpclmulqdq $0x00, %ymm3, %ymm13, %ymm4' => '.byte 0xc4,0xe3,0x15,0x44,0xe3,0x00',
22+
'vpclmulqdq $0x00, %ymm4, %ymm3, %ymm2' => '.byte 0xc4,0xe3,0x65,0x44,0xd4,0x00',
23+
'vpclmulqdq $0x00, %ymm4, %ymm3, %ymm5' => '.byte 0xc4,0xe3,0x65,0x44,0xec,0x00',
24+
'vpclmulqdq $0x00, %ymm5, %ymm3, %ymm0' => '.byte 0xc4,0xe3,0x65,0x44,0xc5,0x00',
25+
'vpclmulqdq $0x00, %ymm5, %ymm4, %ymm0' => '.byte 0xc4,0xe3,0x5d,0x44,0xc5,0x00',
26+
'vpclmulqdq $0x00, %ymm7, %ymm2, %ymm6' => '.byte 0xc4,0xe3,0x6d,0x44,0xf7,0x00',
27+
'vpclmulqdq $0x00, %ymm8, %ymm2, %ymm2' => '.byte 0xc4,0xc3,0x6d,0x44,0xd0,0x00',
28+
'vpclmulqdq $0x01, %ymm0, %ymm6, %ymm2' => '.byte 0xc4,0xe3,0x4d,0x44,0xd0,0x01',
29+
'vpclmulqdq $0x01, %ymm1, %ymm6, %ymm0' => '.byte 0xc4,0xe3,0x4d,0x44,0xc1,0x01',
30+
'vpclmulqdq $0x01, %ymm2, %ymm12, %ymm4' => '.byte 0xc4,0xe3,0x1d,0x44,0xe2,0x01',
31+
'vpclmulqdq $0x01, %ymm2, %ymm12, %ymm6' => '.byte 0xc4,0xe3,0x1d,0x44,0xf2,0x01',
32+
'vpclmulqdq $0x01, %ymm3, %ymm13, %ymm4' => '.byte 0xc4,0xe3,0x15,0x44,0xe3,0x01',
33+
'vpclmulqdq $0x01, %ymm5, %ymm2, %ymm3' => '.byte 0xc4,0xe3,0x6d,0x44,0xdd,0x01',
34+
'vpclmulqdq $0x01, %ymm5, %ymm3, %ymm1' => '.byte 0xc4,0xe3,0x65,0x44,0xcd,0x01',
35+
'vpclmulqdq $0x01, %ymm5, %ymm4, %ymm1' => '.byte 0xc4,0xe3,0x5d,0x44,0xcd,0x01',
36+
'vpclmulqdq $0x01, %ymm5, %ymm4, %ymm2' => '.byte 0xc4,0xe3,0x5d,0x44,0xd5,0x01',
37+
'vpclmulqdq $0x01, %ymm6, %ymm2, %ymm3' => '.byte 0xc4,0xe3,0x6d,0x44,0xde,0x01',
38+
'vpclmulqdq $0x01, %ymm6, %ymm4, %ymm2' => '.byte 0xc4,0xe3,0x5d,0x44,0xd6,0x01',
39+
'vpclmulqdq $0x10, %ymm2, %ymm12, %ymm4' => '.byte 0xc4,0xe3,0x1d,0x44,0xe2,0x10',
40+
'vpclmulqdq $0x10, %ymm3, %ymm13, %ymm4' => '.byte 0xc4,0xe3,0x15,0x44,0xe3,0x10',
41+
'vpclmulqdq $0x10, %ymm5, %ymm3, %ymm2' => '.byte 0xc4,0xe3,0x65,0x44,0xd5,0x10',
42+
'vpclmulqdq $0x10, %ymm5, %ymm4, %ymm2' => '.byte 0xc4,0xe3,0x5d,0x44,0xd5,0x10',
43+
'vpclmulqdq $0x10, %ymm7, %ymm2, %ymm2' => '.byte 0xc4,0xe3,0x6d,0x44,0xd7,0x10',
44+
'vpclmulqdq $0x10, %ymm8, %ymm2, %ymm2' => '.byte 0xc4,0xc3,0x6d,0x44,0xd0,0x10',
45+
'vpclmulqdq $0x11, %ymm2, %ymm12, %ymm4' => '.byte 0xc4,0xe3,0x1d,0x44,0xe2,0x11',
46+
'vpclmulqdq $0x11, %ymm2, %ymm12, %ymm7' => '.byte 0xc4,0xe3,0x1d,0x44,0xfa,0x11',
47+
'vpclmulqdq $0x11, %ymm3, %ymm13, %ymm4' => '.byte 0xc4,0xe3,0x15,0x44,0xe3,0x11',
48+
'vpclmulqdq $0x11, %ymm4, %ymm3, %ymm1' => '.byte 0xc4,0xe3,0x65,0x44,0xcc,0x11',
49+
'vpclmulqdq $0x11, %ymm4, %ymm3, %ymm2' => '.byte 0xc4,0xe3,0x65,0x44,0xd4,0x11',
50+
'vpclmulqdq $0x11, %ymm5, %ymm3, %ymm4' => '.byte 0xc4,0xe3,0x65,0x44,0xe5,0x11',
51+
'vpclmulqdq $0x11, %ymm5, %ymm4, %ymm3' => '.byte 0xc4,0xe3,0x5d,0x44,0xdd,0x11',
52+
);
53+
chomp($line);
54+
my $trimmed;
55+
$trimmed = $line;
56+
$trimmed =~ s/^\s+//;
57+
$trimmed =~ s/\s+(#.*)?$//;
58+
59+
if (exists $asmMap{$trimmed}) {
60+
$line = $asmMap{$trimmed};
61+
} else {
62+
if($trimmed =~ /(vpclmulqdq|vaes).*%[yz]mm/) {
63+
die ("found instruction not supported under old binutils, please update asmMap with the results of running\n" .
64+
'find target -name "*aes-gcm-avx*.o" -exec python3 crypto/fipsmodule/aes/asm/make-avx-map-for-old-binutils.py \{\} \; | LC_ALL=C sort | uniq');
65+
}
66+
}
67+
return $line;
68+
}
69+
70+
while (<STDIN>)
71+
{
72+
print filter($_), "\n";
73+
}

0 commit comments

Comments
 (0)