Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions components/esp_lvgl_port/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,14 @@ if("usb_host_hid" IN_LIST build_components)
list(APPEND ADD_LIBS idf::usb_host_hid)
endif()

# Include SIMD assembly source code for rendering, only for (9.1.0 <= LVG_version < 9.2.0) and only for esp32 and esp32s3
# Include SIMD assembly source code for rendering, only for (9.1.0 <= LVG_version < 9.2.0) and only for Xtensa targets (esp32, esp32s2, esp32s3)
if((lvgl_ver VERSION_GREATER_EQUAL "9.1.0") AND (lvgl_ver VERSION_LESS "9.2.0"))
if(CONFIG_IDF_TARGET_ESP32 OR CONFIG_IDF_TARGET_ESP32S3)
if(CONFIG_IDF_TARGET_ESP32 OR CONFIG_IDF_TARGET_ESP32S3 OR CONFIG_IDF_TARGET_ESP32S2)
message(VERBOSE "Compiling SIMD")
if(CONFIG_IDF_TARGET_ESP32S3)
file(GLOB_RECURSE ASM_SRCS ${PORT_PATH}/simd/*_esp32s3.S) # Select only esp32s3 related files
file(GLOB_RECURSE ASM_SRCS ${PORT_PATH}/simd/*_xtensa_pie.S) # Select Xtensa PIE, for esp32s3 target
else()
file(GLOB_RECURSE ASM_SRCS ${PORT_PATH}/simd/*_esp32.S) # Select only esp32 related files
file(GLOB_RECURSE ASM_SRCS ${PORT_PATH}/simd/*_xtensa_base.S) # Select Xtensa Base for esp32, esp32s2 targets
endif()

# Explicitly add all assembly macro files
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/*
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/

#include <xtensa/config/core-isa.h>
#include "lv_macro_memset.S"

// This is LVGL ARGB8888 simple fill for ESP32, ESP32S2 processor

.section .text
.align 4
.global lv_color_blend_to_argb8888_esp
.type lv_color_blend_to_argb8888_esp,@function

// The function implements the following C code:
// void lv_color_blend_to_argb8888(_lv_draw_sw_blend_fill_dsc_t * dsc);

// Input params
//
// dsc - a2

// typedef struct {
// uint32_t opa; l32i 0
// void * dst_buf; l32i 4
// uint32_t dst_w; l32i 8
// uint32_t dst_h; l32i 12
// uint32_t dst_stride; l32i 16
// const void * src_buf; l32i 20
// uint32_t src_stride; l32i 24
// const lv_opa_t * mask_buf; l32i 28
// uint32_t mask_stride; l32i 32
// } asm_dsc_t;

lv_color_blend_to_argb8888_esp:

entry a1, 32

l32i.n a3, a2, 4 // a3 - dest_buff
l32i.n a4, a2, 8 // a4 - dest_w in uint32_t
l32i.n a5, a2, 12 // a5 - dest_h in uint32_t
l32i.n a6, a2, 16 // a6 - dest_stride in bytes
l32i.n a7, a2, 20 // a7 - src_buff (color)
l32i.n a8, a7, 0 // a8 - color as value
slli a11, a4, 2 // a11 - dest_w_bytes = sizeof(uint32_t) * dest_w

beqz a4, _zero_matrix_len_check // Check if dest_w a4 is zero
beqz a5, _zero_matrix_len_check // Check if dest_h a5 is zero
movi a7, 0xff000000 // opacity mask
or a10, a7, a8 // apply opacity
sub a6, a6, a11 // dest_stride = dest_stride - dest_w_bytes

// Check dest_w length
bltui a4, 8, _matrix_width_check // Branch if dest_w (a4) is lower than 8
srli a9, a4, 3 // a9 - loop_len = dest_w / 8

#if !XCHAL_HAVE_LOOPS
slli a14, a9, 5 // a14 = loop_len (a9) * 32 (main loop increments address pointers by 32)
#endif

.outer_loop:

#if XCHAL_HAVE_LOOPS
loopnez a9, ._main_loop // zero-overhead loop (not supported for esp32s2)
#else
// Init loop parameters
beqz a9, ._main_loop // Branch to the end, if a9 is 0 (no need to run the main loop)
add a15, a14, a3 // a15 = a14 + dest_buf address
.main_loop_done:
#endif
// Run main loop which sets 32 bytes (8 ARGB8888 pixels) in one loop run
s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3, offset 0 bytes
s32i.n a10, a3, 4 // save 32 bits from a10 to dest_buff a3, offset 4 bytes
s32i.n a10, a3, 8 // save 32 bits from a10 to dest_buff a3, offset 8 bytes
s32i.n a10, a3, 12 // save 32 bits from a10 to dest_buff a3, offset 12 bytes
s32i.n a10, a3, 16 // save 32 bits from a10 to dest_buff a3, offset 16 bytes
s32i.n a10, a3, 20 // save 32 bits from a10 to dest_buff a3, offset 20 bytes
s32i.n a10, a3, 24 // save 32 bits from a10 to dest_buff a3, offset 24 bytes
s32i.n a10, a3, 28 // save 32 bits from a10 to dest_buff a3, offset 28 bytes
addi.n a3, a3, 32 // increment dest_buff a3 pointer by 32 bytes
#if !XCHAL_HAVE_LOOPS
blt a3, a15, .main_loop_done // Check end of the main loop, branch if dest_buf (a3) lower than a15
#endif
._main_loop:

// Finish the remaining bytes out of the loop

// Check modulo 16 of the dest_w_bytes (a11), if - then set 16 bytes (4 ARGB8888 pixels)
// src_reg a10, dest_buff a3, dest_w_bytes a11
macro_memset_mod_16 a10, a3, a11, __LINE__

// Check modulo 8 of the dest_w_bytes (a11), if - then set 8 bytes (2 ARGB8888 pixels)
// src_reg a10, dest_buff a3, dest_w_bytes a11
macro_memset_mod_8 a10, a3, a11, __LINE__

// Check modulo 4 of the dest_w_bytes (a11), if - then set 4 bytes (1 ARGB8888 pixel)
// src_reg a10, dest_buff a3, dest_w_bytes a11
macro_memset_mod_4 a10, a3, a11, __LINE__

add a3, a3, a6 // dest_buff + dest_stride
addi.n a5, a5, -1 // decrease the outer loop
bnez a5, .outer_loop

movi.n a2, 1 // return LV_RESULT_OK = 1
retw.n // return

//**********************************************************************************************************************

// Small matrix width, keep it simple for lengths less than 8 pixels

_matrix_width_check:

#if !XCHAL_HAVE_LOOPS
slli a14, a4, 2 // a14 = loop_len (a9) * 4 (main loop increments address pointers by 4)
#endif

.outer_loop_short_matrix:

#if XCHAL_HAVE_LOOPS
loopnez a4, ._main_loop_short_matrix // zero-overhead loop (not supported for esp32s2)
#else
// Init loop parameters
add a15, a14, a3 // a15 = a14 + dest_buf address
._main_loop_short_matrix_done:
#endif
// Run main loop which sets 4 bytes (one ARGB8888 pixel) in one loop run
s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3
addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes
#if !XCHAL_HAVE_LOOPS
blt a3, a15, ._main_loop_short_matrix_done // Check end of the main loop, branch if dest_buf (a3) lower than a15
#endif
._main_loop_short_matrix:

add a3, a3, a6 // dest_buff + dest_stride
addi.n a5, a5, -1 // decrease the outer loop
bnez a5, .outer_loop_short_matrix

movi.n a2, 1 // return LV_RESULT_OK = 1
retw.n // return

//**********************************************************************************************************************

// One of the matrix dimensions is zero, return early
_zero_matrix_len_check:
movi.n a2, 1 // return LV_RESULT_OK = 1
retw.n // return
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
Expand Down Expand Up @@ -42,11 +42,13 @@ lv_color_blend_to_argb8888_esp:
l32i.n a8, a7, 0 // a8 - color as value
slli a11, a4, 2 // a11 - dest_w_bytes = sizeof(uint32_t) * dest_w

movi a7, 0xff000000 // oppactiy mask
or a10, a7, a8 // apply oppacity
beqz a4, _zero_matrix_len_check // Check if dest_w a4 is zero
beqz a5, _zero_matrix_len_check // Check if dest_h a5 is zero
movi a7, 0xff000000 // opacity mask
or a10, a7, a8 // apply opacity

// Check for short lengths
// dest_w should be at least 8, othewise it's not worth using esp32s3 TIE
// dest_w should be at least 8, otherwise it's not worth using esp32s3 TIE
bgei a4, 8, _esp32s3_implementation // Branch if dest_w is greater than or equal to 8
j .lv_color_blend_to_argb8888_esp32_body // Jump to esp32 implementation

Expand Down Expand Up @@ -227,7 +229,7 @@ lv_color_blend_to_argb8888_esp:
addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes
_dest_buff_aligned_by_1byte:

// Shift q reg, allowing to set 16-byte unaligned adata
// Shift q reg, allowing to set 16-byte unaligned data
wur.sar_byte a15 // apply unalignment to the SAR_BYTE
ee.src.q q2, q0, q1 // shift concat. of q0 and q1 to q2 by SAR_BYTE amount

Expand Down Expand Up @@ -323,3 +325,10 @@ lv_color_blend_to_argb8888_esp:

movi.n a2, 1 // return LV_RESULT_OK = 1
retw.n // return

//**********************************************************************************************************************

// One of the matrix dimensions is zero, return early
_zero_matrix_len_check:
movi.n a2, 1 // return LV_RESULT_OK = 1
retw.n // return
Loading
Loading