Skip to content

Commit 90fe112

Browse files
author
Ariel Ben-Yehuda
committed
have old binutils support on AVX 1.0 as well and improve it
Currently there are no ymm/zmm in the AVX 1.0 code, but there will be. I tested that when you fill out the map correctly, this code will work for AVX-512 as well, being able to result in the same objdump if you use make-avx-map-for-old-binutils to generate an asmMap. Made the perl code less sensitive to whitespace differences, I still think it's pretty understandable. This currently does not change anything, not even the .S output.
1 parent 2723abb commit 90fe112

File tree

3 files changed

+100
-58
lines changed

3 files changed

+100
-58
lines changed

crypto/fipsmodule/aes/asm/aes-gcm-avx10-x86_64.pl

+35-1
Original file line numberDiff line numberDiff line change
@@ -1364,6 +1364,40 @@ sub _aes_gcm_update {
13641364
$code .= _aes_gcm_update 0;
13651365
$code .= _end_func;
13661366

1367-
print $code;
1367+
1368+
sub filter_and_print {
1369+
# This function replaces AVX-512 assembly instructions with their assembled forms,
1370+
# to allow the code to work on old versions of binutils (older than 2.30) that do
1371+
# not support these instructions.
1372+
my %asmMap = (
1373+
# Currently empty, will contain content once we get AVX-512 support
1374+
);
1375+
for my $line (split("\n",$code)) {
1376+
if ( $line =~ /^
1377+
(?<prespace>\s*)
1378+
(?<instruction>[a-z0-9]+)
1379+
\s+
1380+
(?<args>([^#]*[^ #])?)
1381+
(?<postspace>\s*([#].*)?)
1382+
$/x
1383+
) {
1384+
my $trimmed = $+{instruction} . " " . $+{args};
1385+
my $prespace = $+{prespace};
1386+
my $postspace = $+{postspace};
1387+
if (exists $asmMap{$trimmed}) {
1388+
$line = ${prespace} . $asmMap{$trimmed} . ${postspace};
1389+
} else {
1390+
if($trimmed =~ /(vpclmulqdq|vaes).*%[yz]mm/) {
1391+
die ("found instruction not supported under old binutils, please update asmMap with the results of running\n" .
1392+
'find target -name "*aes-gcm-avx1*.o" -exec python3 crypto/fipsmodule/aes/asm/make-avx-map-for-old-binutils.py \{\} \; | LC_ALL=C sort | uniq');
1393+
}
1394+
}
1395+
}
1396+
print $line,"\n";
1397+
}
1398+
}
1399+
1400+
filter_and_print();
1401+
13681402
close STDOUT or die "error closing STDOUT: $!";
13691403
exit 0;

crypto/fipsmodule/aes/asm/aes-gcm-avx2-x86_64.pl

+61-53
Original file line numberDiff line numberDiff line change
@@ -971,61 +971,69 @@ sub filter_and_print {
971971
# to allow the code to work on old versions of binutils (older than 2.30) that do
972972
# not support these instructions.
973973
my %asmMap = (
974-
'vaesenc %ymm2, %ymm12, %ymm12' => '.byte 0xc4,0x62,0x1d,0xdc,0xe2',
975-
'vaesenc %ymm2, %ymm13, %ymm13' => '.byte 0xc4,0x62,0x15,0xdc,0xea',
976-
'vaesenc %ymm2, %ymm14, %ymm14' => '.byte 0xc4,0x62,0x0d,0xdc,0xf2',
977-
'vaesenc %ymm2, %ymm15, %ymm15' => '.byte 0xc4,0x62,0x05,0xdc,0xfa',
978-
'vaesenclast %ymm10, %ymm12, %ymm12' => '.byte 0xc4,0x42,0x1d,0xdd,0xe2',
979-
'vaesenclast %ymm10, %ymm13, %ymm13' => '.byte 0xc4,0x42,0x15,0xdd,0xea',
980-
'vaesenclast %ymm2, %ymm12, %ymm12' => '.byte 0xc4,0x62,0x1d,0xdd,0xe2',
981-
'vaesenclast %ymm3, %ymm13, %ymm13' => '.byte 0xc4,0x62,0x15,0xdd,0xeb',
982-
'vaesenclast %ymm5, %ymm14, %ymm14' => '.byte 0xc4,0x62,0x0d,0xdd,0xf5',
983-
'vaesenclast %ymm6, %ymm15, %ymm15' => '.byte 0xc4,0x62,0x05,0xdd,0xfe',
984-
'vpclmulqdq $0x00, %ymm2, %ymm12, %ymm4' => '.byte 0xc4,0xe3,0x1d,0x44,0xe2,0x00',
985-
'vpclmulqdq $0x00, %ymm2, %ymm12, %ymm5' => '.byte 0xc4,0xe3,0x1d,0x44,0xea,0x00',
986-
'vpclmulqdq $0x00, %ymm3, %ymm13, %ymm4' => '.byte 0xc4,0xe3,0x15,0x44,0xe3,0x00',
987-
'vpclmulqdq $0x00, %ymm4, %ymm3, %ymm2' => '.byte 0xc4,0xe3,0x65,0x44,0xd4,0x00',
988-
'vpclmulqdq $0x00, %ymm4, %ymm3, %ymm5' => '.byte 0xc4,0xe3,0x65,0x44,0xec,0x00',
989-
'vpclmulqdq $0x00, %ymm5, %ymm3, %ymm0' => '.byte 0xc4,0xe3,0x65,0x44,0xc5,0x00',
990-
'vpclmulqdq $0x00, %ymm5, %ymm4, %ymm0' => '.byte 0xc4,0xe3,0x5d,0x44,0xc5,0x00',
991-
'vpclmulqdq $0x00, %ymm7, %ymm2, %ymm6' => '.byte 0xc4,0xe3,0x6d,0x44,0xf7,0x00',
992-
'vpclmulqdq $0x00, %ymm8, %ymm2, %ymm2' => '.byte 0xc4,0xc3,0x6d,0x44,0xd0,0x00',
993-
'vpclmulqdq $0x01, %ymm0, %ymm6, %ymm2' => '.byte 0xc4,0xe3,0x4d,0x44,0xd0,0x01',
994-
'vpclmulqdq $0x01, %ymm1, %ymm6, %ymm0' => '.byte 0xc4,0xe3,0x4d,0x44,0xc1,0x01',
995-
'vpclmulqdq $0x01, %ymm2, %ymm12, %ymm4' => '.byte 0xc4,0xe3,0x1d,0x44,0xe2,0x01',
996-
'vpclmulqdq $0x01, %ymm2, %ymm12, %ymm6' => '.byte 0xc4,0xe3,0x1d,0x44,0xf2,0x01',
997-
'vpclmulqdq $0x01, %ymm3, %ymm13, %ymm4' => '.byte 0xc4,0xe3,0x15,0x44,0xe3,0x01',
998-
'vpclmulqdq $0x01, %ymm5, %ymm2, %ymm3' => '.byte 0xc4,0xe3,0x6d,0x44,0xdd,0x01',
999-
'vpclmulqdq $0x01, %ymm5, %ymm3, %ymm1' => '.byte 0xc4,0xe3,0x65,0x44,0xcd,0x01',
1000-
'vpclmulqdq $0x01, %ymm5, %ymm4, %ymm1' => '.byte 0xc4,0xe3,0x5d,0x44,0xcd,0x01',
1001-
'vpclmulqdq $0x01, %ymm5, %ymm4, %ymm2' => '.byte 0xc4,0xe3,0x5d,0x44,0xd5,0x01',
1002-
'vpclmulqdq $0x01, %ymm6, %ymm2, %ymm3' => '.byte 0xc4,0xe3,0x6d,0x44,0xde,0x01',
1003-
'vpclmulqdq $0x01, %ymm6, %ymm4, %ymm2' => '.byte 0xc4,0xe3,0x5d,0x44,0xd6,0x01',
1004-
'vpclmulqdq $0x10, %ymm2, %ymm12, %ymm4' => '.byte 0xc4,0xe3,0x1d,0x44,0xe2,0x10',
1005-
'vpclmulqdq $0x10, %ymm3, %ymm13, %ymm4' => '.byte 0xc4,0xe3,0x15,0x44,0xe3,0x10',
1006-
'vpclmulqdq $0x10, %ymm5, %ymm3, %ymm2' => '.byte 0xc4,0xe3,0x65,0x44,0xd5,0x10',
1007-
'vpclmulqdq $0x10, %ymm5, %ymm4, %ymm2' => '.byte 0xc4,0xe3,0x5d,0x44,0xd5,0x10',
1008-
'vpclmulqdq $0x10, %ymm7, %ymm2, %ymm2' => '.byte 0xc4,0xe3,0x6d,0x44,0xd7,0x10',
1009-
'vpclmulqdq $0x10, %ymm8, %ymm2, %ymm2' => '.byte 0xc4,0xc3,0x6d,0x44,0xd0,0x10',
1010-
'vpclmulqdq $0x11, %ymm2, %ymm12, %ymm4' => '.byte 0xc4,0xe3,0x1d,0x44,0xe2,0x11',
1011-
'vpclmulqdq $0x11, %ymm2, %ymm12, %ymm7' => '.byte 0xc4,0xe3,0x1d,0x44,0xfa,0x11',
1012-
'vpclmulqdq $0x11, %ymm3, %ymm13, %ymm4' => '.byte 0xc4,0xe3,0x15,0x44,0xe3,0x11',
1013-
'vpclmulqdq $0x11, %ymm4, %ymm3, %ymm1' => '.byte 0xc4,0xe3,0x65,0x44,0xcc,0x11',
1014-
'vpclmulqdq $0x11, %ymm4, %ymm3, %ymm2' => '.byte 0xc4,0xe3,0x65,0x44,0xd4,0x11',
1015-
'vpclmulqdq $0x11, %ymm5, %ymm3, %ymm4' => '.byte 0xc4,0xe3,0x65,0x44,0xe5,0x11',
1016-
'vpclmulqdq $0x11, %ymm5, %ymm4, %ymm3' => '.byte 0xc4,0xe3,0x5d,0x44,0xdd,0x11',
974+
'vaesenc %ymm2, %ymm12, %ymm12' => '.byte 0xc4,0x62,0x1d,0xdc,0xe2',
975+
'vaesenc %ymm2, %ymm13, %ymm13' => '.byte 0xc4,0x62,0x15,0xdc,0xea',
976+
'vaesenc %ymm2, %ymm14, %ymm14' => '.byte 0xc4,0x62,0x0d,0xdc,0xf2',
977+
'vaesenc %ymm2, %ymm15, %ymm15' => '.byte 0xc4,0x62,0x05,0xdc,0xfa',
978+
'vaesenclast %ymm10, %ymm12, %ymm12' => '.byte 0xc4,0x42,0x1d,0xdd,0xe2',
979+
'vaesenclast %ymm10, %ymm13, %ymm13' => '.byte 0xc4,0x42,0x15,0xdd,0xea',
980+
'vaesenclast %ymm2, %ymm12, %ymm12' => '.byte 0xc4,0x62,0x1d,0xdd,0xe2',
981+
'vaesenclast %ymm3, %ymm13, %ymm13' => '.byte 0xc4,0x62,0x15,0xdd,0xeb',
982+
'vaesenclast %ymm5, %ymm14, %ymm14' => '.byte 0xc4,0x62,0x0d,0xdd,0xf5',
983+
'vaesenclast %ymm6, %ymm15, %ymm15' => '.byte 0xc4,0x62,0x05,0xdd,0xfe',
984+
'vpclmulqdq $0x00, %ymm2, %ymm12, %ymm4' => '.byte 0xc4,0xe3,0x1d,0x44,0xe2,0x00',
985+
'vpclmulqdq $0x00, %ymm2, %ymm12, %ymm5' => '.byte 0xc4,0xe3,0x1d,0x44,0xea,0x00',
986+
'vpclmulqdq $0x00, %ymm3, %ymm13, %ymm4' => '.byte 0xc4,0xe3,0x15,0x44,0xe3,0x00',
987+
'vpclmulqdq $0x00, %ymm4, %ymm3, %ymm2' => '.byte 0xc4,0xe3,0x65,0x44,0xd4,0x00',
988+
'vpclmulqdq $0x00, %ymm4, %ymm3, %ymm5' => '.byte 0xc4,0xe3,0x65,0x44,0xec,0x00',
989+
'vpclmulqdq $0x00, %ymm5, %ymm3, %ymm0' => '.byte 0xc4,0xe3,0x65,0x44,0xc5,0x00',
990+
'vpclmulqdq $0x00, %ymm5, %ymm4, %ymm0' => '.byte 0xc4,0xe3,0x5d,0x44,0xc5,0x00',
991+
'vpclmulqdq $0x00, %ymm7, %ymm2, %ymm6' => '.byte 0xc4,0xe3,0x6d,0x44,0xf7,0x00',
992+
'vpclmulqdq $0x00, %ymm8, %ymm2, %ymm2' => '.byte 0xc4,0xc3,0x6d,0x44,0xd0,0x00',
993+
'vpclmulqdq $0x01, %ymm0, %ymm6, %ymm2' => '.byte 0xc4,0xe3,0x4d,0x44,0xd0,0x01',
994+
'vpclmulqdq $0x01, %ymm1, %ymm6, %ymm0' => '.byte 0xc4,0xe3,0x4d,0x44,0xc1,0x01',
995+
'vpclmulqdq $0x01, %ymm2, %ymm12, %ymm4' => '.byte 0xc4,0xe3,0x1d,0x44,0xe2,0x01',
996+
'vpclmulqdq $0x01, %ymm2, %ymm12, %ymm6' => '.byte 0xc4,0xe3,0x1d,0x44,0xf2,0x01',
997+
'vpclmulqdq $0x01, %ymm3, %ymm13, %ymm4' => '.byte 0xc4,0xe3,0x15,0x44,0xe3,0x01',
998+
'vpclmulqdq $0x01, %ymm5, %ymm2, %ymm3' => '.byte 0xc4,0xe3,0x6d,0x44,0xdd,0x01',
999+
'vpclmulqdq $0x01, %ymm5, %ymm3, %ymm1' => '.byte 0xc4,0xe3,0x65,0x44,0xcd,0x01',
1000+
'vpclmulqdq $0x01, %ymm5, %ymm4, %ymm1' => '.byte 0xc4,0xe3,0x5d,0x44,0xcd,0x01',
1001+
'vpclmulqdq $0x01, %ymm5, %ymm4, %ymm2' => '.byte 0xc4,0xe3,0x5d,0x44,0xd5,0x01',
1002+
'vpclmulqdq $0x01, %ymm6, %ymm2, %ymm3' => '.byte 0xc4,0xe3,0x6d,0x44,0xde,0x01',
1003+
'vpclmulqdq $0x01, %ymm6, %ymm4, %ymm2' => '.byte 0xc4,0xe3,0x5d,0x44,0xd6,0x01',
1004+
'vpclmulqdq $0x10, %ymm2, %ymm12, %ymm4' => '.byte 0xc4,0xe3,0x1d,0x44,0xe2,0x10',
1005+
'vpclmulqdq $0x10, %ymm3, %ymm13, %ymm4' => '.byte 0xc4,0xe3,0x15,0x44,0xe3,0x10',
1006+
'vpclmulqdq $0x10, %ymm5, %ymm3, %ymm2' => '.byte 0xc4,0xe3,0x65,0x44,0xd5,0x10',
1007+
'vpclmulqdq $0x10, %ymm5, %ymm4, %ymm2' => '.byte 0xc4,0xe3,0x5d,0x44,0xd5,0x10',
1008+
'vpclmulqdq $0x10, %ymm7, %ymm2, %ymm2' => '.byte 0xc4,0xe3,0x6d,0x44,0xd7,0x10',
1009+
'vpclmulqdq $0x10, %ymm8, %ymm2, %ymm2' => '.byte 0xc4,0xc3,0x6d,0x44,0xd0,0x10',
1010+
'vpclmulqdq $0x11, %ymm2, %ymm12, %ymm4' => '.byte 0xc4,0xe3,0x1d,0x44,0xe2,0x11',
1011+
'vpclmulqdq $0x11, %ymm2, %ymm12, %ymm7' => '.byte 0xc4,0xe3,0x1d,0x44,0xfa,0x11',
1012+
'vpclmulqdq $0x11, %ymm3, %ymm13, %ymm4' => '.byte 0xc4,0xe3,0x15,0x44,0xe3,0x11',
1013+
'vpclmulqdq $0x11, %ymm4, %ymm3, %ymm1' => '.byte 0xc4,0xe3,0x65,0x44,0xcc,0x11',
1014+
'vpclmulqdq $0x11, %ymm4, %ymm3, %ymm2' => '.byte 0xc4,0xe3,0x65,0x44,0xd4,0x11',
1015+
'vpclmulqdq $0x11, %ymm5, %ymm3, %ymm4' => '.byte 0xc4,0xe3,0x65,0x44,0xe5,0x11',
1016+
'vpclmulqdq $0x11, %ymm5, %ymm4, %ymm3' => '.byte 0xc4,0xe3,0x5d,0x44,0xdd,0x11',
10171017
);
10181018
for my $line (split("\n",$code)) {
1019-
my $trimmed;
1020-
$trimmed = $line;
1021-
$trimmed =~ s/^\s+//;
1022-
$trimmed =~ s/\s+(#.*)?$//;
1023-
if (exists $asmMap{$trimmed}) {
1024-
$line = $asmMap{$trimmed};
1025-
} else {
1026-
if($trimmed =~ /(vpclmulqdq|vaes).*%[yz]mm/) {
1027-
die ("found instruction not supported under old binutils, please update asmMap with the results of running\n" .
1028-
'find target -name "*aes-gcm-avx2*.o" -exec python3 crypto/fipsmodule/aes/asm/make-avx-map-for-old-binutils.py \{\} \; | LC_ALL=C sort | uniq');
1019+
if ( $line =~ /^
1020+
(?<prespace>\s*)
1021+
(?<instruction>[a-z0-9]+)
1022+
\s+
1023+
(?<args>([^#]*[^ #])?)
1024+
(?<postspace>\s*([#].*)?)
1025+
$/x
1026+
) {
1027+
my $trimmed = $+{instruction} . " " . $+{args};
1028+
my $prespace = $+{prespace};
1029+
my $postspace = $+{postspace};
1030+
if (exists $asmMap{$trimmed}) {
1031+
$line = ${prespace} . $asmMap{$trimmed} . ${postspace};
1032+
} else {
1033+
if($trimmed =~ /(vpclmulqdq|vaes).*%[yz]mm/) {
1034+
die ("found instruction not supported under old binutils, please update asmMap with the results of running\n" .
1035+
'find target -name "*aes-gcm-avx1*.o" -exec python3 crypto/fipsmodule/aes/asm/make-avx-map-for-old-binutils.py \{\} \; | LC_ALL=C sort | uniq');
1036+
}
10291037
}
10301038
}
10311039
print $line,"\n";

crypto/fipsmodule/aes/asm/make-avx-map-for-old-binutils.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
import re
66
import sys
77

8-
PCLMUL_RE = re.compile(r'^\s+[0-9a-f]+:\s+(?P<disas>(?:[0-9a-f][0-9a-f] )+)\s+vpclmul(?P<type>[0-9a-z]+)dq (?P<args>.*%ymm.*)$')
9-
NON_PCLMUL_RE = re.compile(r'^\s+[0-9a-f]+:\s+(?P<disas>(?:[0-9a-f][0-9a-f] )+)\s+(?P<instruction>vaesenc|vaesenclast) (?P<args>.*%ymm.*)$')
8+
PCLMUL_RE = re.compile(r'^\s+[0-9a-f]+:\s+(?P<disas>(?:[0-9a-f][0-9a-f] )+)\s+vpclmul(?P<type>[0-9a-z]+)dq (?P<args>.*%[yz]mm.*)$')
9+
NON_PCLMUL_RE = re.compile(r'^\s+[0-9a-f]+:\s+(?P<disas>(?:[0-9a-f][0-9a-f] )+)\s+(?P<instruction>vaesenc|vaesenclast) (?P<args>.*%[yz]mm.*)$')
1010

1111
TYPE_MAP = {
1212
'lqlq': 0x00,
@@ -31,11 +31,11 @@ def main():
3131
hexified_disas = hexify_disas(match.group('disas'))
3232
ty = TYPE_MAP[match.group('type')]
3333
args = match.group('args').replace(',', ', ')
34-
print(f" 'vpclmulqdq $0x{ty:02x}, {args}' => '.byte {hexified_disas}',")
34+
print(f" 'vpclmulqdq $0x{ty:02x}, {args}' => '.byte {hexified_disas}',")
3535
elif match := NON_PCLMUL_RE.match(line):
3636
hexified_disas = hexify_disas(match.group('disas'))
3737
args = match.group('args').replace(',', ', ')
38-
print(f" '{match.group('instruction').ljust(16)}{args}' => '.byte {hexified_disas}',")
38+
print(f" '{match.group('instruction')} {args}' => '.byte {hexified_disas}',")
3939

4040

4141
if __name__ == '__main__':

0 commit comments

Comments
 (0)