Skip to content

Commit 72a0aa3

Browse files
committed
Added Benchmark script. Added ARMASM support.
1 parent 518909e commit 72a0aa3

File tree

10 files changed

+207
-33
lines changed

10 files changed

+207
-33
lines changed

.github/workflows/test-configs.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,28 +70,28 @@ jobs:
7070
with:
7171
arch: arm
7272
config-file: ./config/examples/imx-rt1040.config
73-
make-args: PKA=1
73+
make-args: PKA=1 NO_ARM_ASM=1
7474

7575
imx_rt1050_test_pka:
7676
uses: ./.github/workflows/test-build-mcux-sdk.yml
7777
with:
7878
arch: arm
7979
config-file: ./config/examples/imx-rt1050.config
80-
make-args: PKA=1
80+
make-args: PKA=1 NO_ARM_ASM=1
8181

8282
imx_rt1060_test_pka:
8383
uses: ./.github/workflows/test-build-mcux-sdk.yml
8484
with:
8585
arch: arm
8686
config-file: ./config/examples/imx-rt1060.config
87-
make-args: PKA=1
87+
make-args: PKA=1 NO_ARM_ASM=1
8888

8989
imx_rt1064_test_pka:
9090
uses: ./.github/workflows/test-build-mcux-sdk.yml
9191
with:
9292
arch: arm
9393
config-file: ./config/examples/imx-rt1064.config
94-
make-args: PKA=1
94+
make-args: PKA=1 NO_ARM_ASM=1
9595

9696
kinetis_k64f_test:
9797
uses: ./.github/workflows/test-build-mcux-sdk.yml

Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ clean:
318318
$(Q)rm -f $(MACHINE_OBJ) $(MAIN_TARGET) $(LSCRIPT)
319319
$(Q)rm -f $(OBJS)
320320
$(Q)rm -f tools/keytools/otp/otp-keystore-gen
321+
$(Q)rm -f .stack_usage
321322
$(Q)$(MAKE) -C test-app -s clean
322323
$(Q)$(MAKE) -C tools/check_config -s clean
323324
$(Q)$(MAKE) -C stage1 -s clean
@@ -385,6 +386,13 @@ line-count-nrf52:
385386
line-count-x86:
386387
cloc --force-lang-def cloc_lang_def.txt src/boot_x86_fsp.c src/boot_x86_fsp_payload.c src/boot_x86_fsp_start.S src/image.c src/keystore.c src/libwolfboot.c src/loader.c src/string.c src/update_disk.c src/x86/ahci.c src/x86/ata.c src/x86/common.c src/x86/gpt.c src/x86/hob.c src/pci.c src/x86/tgl_fsp.c hal/x86_fsp_tgl.c hal/x86_uart.c
387388

389+
stack-usage: wolfboot.bin
390+
$(Q)echo $(STACK_USAGE) > .stack_usage
391+
392+
image-header-size: wolfboot.bin
393+
$(Q)echo $(IMAGE_HEADER_SIZE) > .image_header_size
394+
395+
388396
cppcheck:
389397
cppcheck -f --enable=warning --enable=portability \
390398
--suppress="ctunullpointer" --suppress="nullPointer" \

arch.mk

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,8 @@ ifeq ($(ARCH),ARM)
188188
ifeq ($(CORTEX_A5),1)
189189
FPU=-mfpu=vfp4-d16
190190
CFLAGS+=-mcpu=cortex-a5 -mtune=cortex-a5 -static -z noexecstack
191-
LDLAGS+=-mcpu=cortex-a5 -mtune=cortex-a5 -mtune=cortex-a5 -static -z noexecstack -Ttext 0x300000
191+
LDLAGS+=-mcpu=cortex-a5 -mtune=cortex-a5 -mtune=cortex-a5 -static \
192+
-z noexecstack -Ttext 0x300000
192193
# Cortex-A uses boot_arm32.o
193194
OBJS+=src/boot_arm32.o src/boot_arm32_start.o
194195
ifeq ($(NO_ASM),1)
@@ -198,11 +199,37 @@ ifeq ($(CORTEX_A5),1)
198199
OBJS+=./lib/wolfssl/wolfcrypt/src/port/arm/armv8-sha256.o
199200
OBJS+=./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm.o
200201
OBJS+=./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.o
201-
CFLAGS+=-DWOLFSSL_SP_ARM32_ASM -DWOLFSSL_ARMASM -DWOLFSSL_ARMASM_NO_HW_CRYPTO -DWOLFSSL_ARM_ARCH=7 -DWOLFSSL_ARMASM_INLINE -DWOLFSSL_ARMASM_NO_NEON
202+
CFLAGS+=-DWOLFSSL_SP_ARM32_ASM -DWOLFSSL_ARMASM -DWOLFSSL_ARMASM_NO_HW_CRYPTO \
203+
-DWOLFSSL_ARM_ARCH=7 -DWOLFSSL_ARMASM_INLINE -DWOLFSSL_ARMASM_NO_NEON
202204
endif
203205
else
204206
# All others use boot_arm.o
205207
OBJS+=src/boot_arm.o
208+
ifneq ($(NO_ARM_ASM),1)
209+
CORTEXM_ARM_EXTRA_OBJS= \
210+
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-aes.o \
211+
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-chacha.o \
212+
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-sha256.o \
213+
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-sha512.o \
214+
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-aes-asm.o \
215+
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.o \
216+
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm.o \
217+
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.o \
218+
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm.o \
219+
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.o \
220+
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha3-asm.o \
221+
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.o \
222+
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-chacha-asm.o \
223+
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.o
224+
225+
226+
CORTEXM_ARM_THUMB_EXTRA_OBJS= \
227+
./lib/wolfssl/wolfcrypt/src/port/arm/thumb2-sha256-asm.o \
228+
./lib/wolfssl/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.o
229+
230+
CORTEXM_ARM_EXTRA_CFLAGS+=-DWOLFSSL_ARMASM -DWOLFSSL_ARMASM_NO_HW_CRYPTO \
231+
-DWOLFSSL_ARMASM_INLINE -DWOLFSSL_ARMASM_NO_NEON
232+
endif
206233
ifeq ($(CORTEX_M33),1)
207234
CFLAGS+=-mcpu=cortex-m33 -DCORTEX_M33
208235
LDFLAGS+=-mcpu=cortex-m33
@@ -212,28 +239,25 @@ else
212239
endif
213240
CFLAGS+=-mcmse
214241
ifeq ($(WOLFCRYPT_TZ),1)
242+
CORTEXM_ARM_EXTRA_OBJS=
243+
CORTEXM_ARM_EXTRA_CFLAGS=
215244
SECURE_OBJS+=./src/wc_callable.o
216245
SECURE_OBJS+=./lib/wolfssl/wolfcrypt/src/random.o
217246
CFLAGS+=-DWOLFCRYPT_SECURE_MODE
218247
SECURE_LDFLAGS+=-Wl,--cmse-implib -Wl,--out-implib=./src/wc_secure_calls.o
219248
endif
220249
endif # TZEN=1
221-
ifeq ($(NO_ASM),1)
222250
ifeq ($(SPMATH),1)
223251
ifeq ($(NO_ASM),1)
224252
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_c32.o
225253
else
226254
CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM
227255
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o
256+
CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=8
257+
OBJS+=$(CORTEXM_ARM_EXTRA_OBJS)
228258
endif
229259
endif
230260
else
231-
ifeq ($(SPMATH),1)
232-
CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM
233-
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o
234-
endif
235-
endif
236-
else
237261
ifeq ($(CORTEX_M7),1)
238262
CFLAGS+=-mcpu=cortex-m7
239263
LDFLAGS+=-mcpu=cortex-m7
@@ -243,10 +267,12 @@ else
243267
else
244268
CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM
245269
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o
270+
CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=7
271+
OBJS+=$(CORTEXM_ARM_EXTRA_OBJS)
246272
endif
247-
endif
273+
endif
248274
else
249-
ifeq ($(CORTEX_M0),1)
275+
ifeq ($(CORTEX_M0),1)
250276
CFLAGS+=-mcpu=cortex-m0
251277
LDFLAGS+=-mcpu=cortex-m0
252278
ifeq ($(SPMATH),1)
@@ -255,6 +281,9 @@ else
255281
else
256282
CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_THUMB_ASM
257283
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_armthumb.o
284+
# TODO: integrate thumb2-asm
285+
#CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=6
286+
#OBJS+=$(CORTEXM_ARM_THUMB_EXTRA_OBJS)
258287
endif
259288
endif
260289
else
@@ -269,6 +298,8 @@ else
269298
ifeq ($(SPMATH),1)
270299
CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM -DWOLFSSL_SP_NO_UMAAL
271300
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o
301+
CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=7
302+
OBJS+=$(CORTEXM_ARM_EXTRA_OBJS)
272303
endif
273304
endif
274305
else
@@ -284,6 +315,8 @@ else
284315
ifeq ($(SPMATH),1)
285316
CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM
286317
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o
318+
CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=7
319+
OBJS+=$(CORTEXM_ARM_EXTRA_OBJS)
287320
endif
288321
endif
289322
endif

docs/compile.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,28 @@ By default, wolfBoot is compiled for ARM Cortex-M3/4/7. To compile for Cortex-M0
4242

4343
`CORTEX_M0=1`
4444

45+
### Speed vs. size
46+
47+
On a number of targets, algorithm may be optimized automatically to use assembly
48+
optimizations. To disable assembly optimizations, use `NO_ASM=1`. This option will
49+
produce smaller code, but will also impact on the boot time.
50+
51+
ARM-specific ARM optimizations affecting hash and symmetric key ciphers can be
52+
disabled with the option `NO_ARM_ASM=1`. This is useful for example when you want
53+
to use SP math optimizations for key verification, but exclude SHA2/AES optimizations
54+
to save some space.
55+
56+
#### Example: ECC256 + SHA256 on STM32H7
57+
58+
Benchmark footprint vs. boot time SHA of 100KB image + signature verification
59+
60+
| Description | Selected options | wolfBoot size (B) | Boot time (s) |
61+
|-------------|------------------|-------------------|---------------|
62+
| Full ECC256 assembly optimizations. Fastest. | `SIGN=ECC256` | 21836 | .583 |
63+
| Optimize ECC only (SP math assembly only) | `SIGN=ECC256 NO_ARM_ASM=1` | 18624 | .760 |
64+
| No assembly optimizations (smallest) | `SIGN=ECC256 NO_ASM=1` | 14416 | 3.356 |
65+
66+
4567
### Flash partitions
4668

4769
The file [include/target.h](../include/target.h) is generated according to the configured flash geometry,

include/user_settings.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@ extern int tolower(int c);
7575
# define ED25519_SMALL
7676
# define NO_ED25519_SIGN
7777
# define NO_ED25519_EXPORT
78-
# define WOLFSSL_SHA512
7978
# define USE_SLOW_SHA512
79+
# define WOLFSSL_SHA512
8080
#endif
8181

8282
/* ED448 and SHA3/SHAKE256 */
@@ -267,6 +267,9 @@ extern int tolower(int c);
267267
!defined(WOLFCRYPT_SECURE_MODE)
268268
# define NO_SHA256
269269
# endif
270+
#ifndef WOLFSSL_SHA512
271+
#define WOLFSSL_SHA512
272+
#endif
270273
#endif
271274

272275
/* If SP math is enabled determine word size */
@@ -499,4 +502,8 @@ extern int tolower(int c);
499502

500503
#endif /* WOLFBOOT_PKCS11_APP */
501504

505+
#ifndef XTOLOWER
506+
#define XTOLOWER(x) (x)
507+
#endif
508+
502509
#endif /* !_WOLFBOOT_USER_SETTINGS_H_ */

options.mk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,7 @@ ifeq ($(SIGN),XMSS)
448448
ifeq ($(WOLFBOOT_SMALL_STACK),1)
449449
$(error WOLFBOOT_SMALL_STACK with XMSS not supported)
450450
else
451-
STACK_USAGE=2720
451+
STACK_USAGE=9352
452452
endif
453453
endif
454454

test-app/app_stm32h7.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,9 @@ void uart_print(const char *s)
362362
}
363363
}
364364

365+
#define FILLER_SIZE (100 * 1024)
366+
static volatile uint8_t filler_data[FILLER_SIZE] = { 0x01, 0x02, 0x03 };
367+
365368
void main(void)
366369
{
367370
uint8_t firmware_version = 0;
@@ -373,6 +376,7 @@ void main(void)
373376
if (FIRMWARE_A)
374377
ld3_write(LED_INIT);
375378

379+
filler_data[FILLER_SIZE - 1] = 0xAA;
376380
/* LED Indicator of successful UART initialization. SUCCESS = ON, FAIL = OFF */
377381
if (uart_setup(115200) < 0)
378382
ld2_write(LED_OFF);

tools/config.mk

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ ifeq ($(ARCH),)
2323
CORTEX_M7?=0
2424
CORTEX_M3?=0
2525
NO_ASM?=0
26+
NO_ARM_ASM?=0
2627
EXT_FLASH?=0
2728
SPI_FLASH?=0
2829
QSPI_FLASH?=0
@@ -104,5 +105,6 @@ CONFIG_VARS:= ARCH TARGET SIGN HASH MCUXSDK MCUXPRESSO MCUXPRESSO_CPU MCUXPRESSO
104105
NXP_CUSTOM_DCD NXP_CUSTOM_DCD_OBJS \
105106
FLASH_OTP_KEYSTORE \
106107
KEYVAULT_OBJ_SIZE \
107-
KEYVAULT_MAX_ITEMS
108+
KEYVAULT_MAX_ITEMS \
109+
NO_ARM_ASM
108110

tools/scripts/benchmark.sh

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/bin/bash
2+
#
3+
function run_on_board() {
4+
# GPIO2: RST
5+
# GPIO3: BOOT (input)
6+
7+
if ! (st-flash reset &>/dev/null); then
8+
echo -n "No data."
9+
else
10+
sleep 1
11+
st-flash --connect-under-reset write factory.bin 0x8000000 &>/dev/null
12+
sleep .2
13+
echo "2" > /sys/class/gpio/export 2>/dev/null
14+
echo "out" > /sys/class/gpio/gpio2/direction
15+
echo "1" > /sys/class/gpio/gpio2/value # Release reset
16+
echo "0" > /sys/class/gpio/gpio2/value # Keep reset low
17+
sleep 1
18+
echo -n " | "
19+
echo "1" > /sys/class/gpio/gpio2/value # Release reset
20+
START=`date +%s.%N`
21+
while (test `cat /sys/class/gpio/gpio4/value` -eq 0); do
22+
sleep .01
23+
done
24+
while (test `cat /sys/class/gpio/gpio4/value` -eq 0); do
25+
sleep .01
26+
done
27+
END=`date +%s.%N`
28+
echo "scale=3; $END/1 - $START/1 "| bc
29+
echo "in" > /sys/class/gpio/gpio2/direction
30+
echo "2" >/sys/class/gpio/unexport 2>/dev/null
31+
fi
32+
}
33+
34+
function set_benchmark {
35+
NAME=$1
36+
shift
37+
CONFIG=$@
38+
# Name
39+
echo -n "| "
40+
echo -n $NAME
41+
echo -n " | "
42+
# Configuration
43+
echo -n $CONFIG | tr -d '\n'
44+
echo -n " | "
45+
make clean &>/dev/null
46+
make keysclean &>/dev/null
47+
make $@ factory.bin &>/dev/null || make $@ factory.bin
48+
make $@ stack-usage &>/dev/null
49+
make $@ image-header-size &>/dev/null
50+
# Bootloader size
51+
echo -n `ls -l wolfboot.bin | cut -d " " -f 5 | tr -d '\n'`
52+
echo -n " | "
53+
# Stack size
54+
cat .stack_usage | tr -d '\n'
55+
echo -n " | "
56+
# Image header size
57+
cat .image_header_size | tr -d '\n'
58+
# Boot time
59+
run_on_board 2>&1 | tr -d '\n'
60+
echo " |"
61+
}
62+
63+
echo "4" > /sys/class/gpio/export 2>/dev/null
64+
echo "2" > /sys/class/gpio/unexport 2>/dev/null
65+
make keytools &>/dev/null
66+
cp config/examples/stm32h7.config .config
67+
echo "in" > /sys/class/gpio/gpio4/direction
68+
# Output benchmark results in a Markdown table
69+
echo "| Name | Configuration | Bootloader size | Stack size | Image header size | Boot time |"
70+
echo "|------|---------------|-----------------|------------|-------------------|-----------|"
71+
72+
73+
set_benchmark "SHA2 only" SIGN=NONE
74+
set_benchmark "SHA384 only" SIGN=NONE HASH=SHA384
75+
set_benchmark "SHA3 only" SIGN=NONE HASH=SHA3
76+
set_benchmark "SHA2 only,small" SIGN=NONE NO_ASM=1
77+
set_benchmark "rsa2048" SIGN=RSA2048
78+
set_benchmark "rsa3072" SIGN=RSA3072
79+
set_benchmark "rsa4096" SIGN=RSA4096
80+
set_benchmark "rsa4096 with sha384" SIGN=RSA4096 HASH=SHA384
81+
set_benchmark "ecdsa256" SIGN=ECC256
82+
set_benchmark "ecdsa384" SIGN=ECC384
83+
set_benchmark "ecdsa521" SIGN=ECC521
84+
set_benchmark "ecdsa256 with small stack" SIGN=ECC384 WOLFBOOT_SMALL_STACK=1
85+
set_benchmark "ecdsa256 with fast math" SIGN=ECC384 SP_MATH=0
86+
set_benchmark "ecdsa256, no asm" SIGN=ECC256 NO_ASM=1
87+
set_benchmark "ecdsa384, no asm" SIGN=ECC384 NO_ASM=1
88+
set_benchmark "ecdsa521, no asm" SIGN=ECC521 NO_ASM=1
89+
set_benchmark "ecdsa384 with sha384" SIGN=ECC384 HASH=SHA384
90+
set_benchmark "ed25519 with sha384, small" SIGN=ED25519 HASH=SHA384 NO_ASM=1
91+
set_benchmark "ed25519 fast" SIGN=ED25519 NO_ASM=0
92+
set_benchmark "ed448" SIGN=ED448
93+
set_benchmark "ML_DSA-44" SIGN=ML_DSA ML_DSA_LEVEL=2 IMAGE_SIGNATURE_SIZE=2420 IMAGE_HEADER_SIZE=8192
94+
set_benchmark "ML_DSA-65" SIGN=ML_DSA ML_DSA_LEVEL=3 IMAGE_SIGNATURE_SIZE=3309 IMAGE_HEADER_SIZE=8192
95+
set_benchmark "ML_DSA-87" SIGN=ML_DSA ML_DSA_LEVEL=5 IMAGE_SIGNATURE_SIZE=4627 IMAGE_HEADER_SIZE=12288
96+
set_benchmark "LMS 1-10-8" SIGN=LMS LMS_LEVELS=1 LMS_HEIGHT=10 LMS_WINTERNITZ=8 IMAGE_HEADER_SIZE=4096 IMAGE_SIGNATURE_SIZE=1456
97+
set_benchmark "XMSS-SHA2_10_256'" XMSS_PARAMS='XMSS-SHA2_10_256' SIGN=XMSS IMAGE_SIGNATURE_SIZE=2500 IMAGE_HEADER_SIZE=8192
98+

0 commit comments

Comments
 (0)