Skip to content

Commit 3af4297

Browse files
Optimize fill
1 parent 58c249f commit 3af4297

File tree

1 file changed

+33
-20
lines changed

1 file changed

+33
-20
lines changed

components/hub75/src/platforms/i2s/i2s_dma.cpp

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ static constexpr uint16_t BCM_BIT_MASKS[12] = {0x0001, 0x0002, 0x0004, 0x0008, 0
7979
// In 16-bit parallel mode with tx_fifo_mod=1, the FIFO outputs 16-bit words in swapped pairs.
8080
// The FIFO reads 32-bit words from memory and outputs them as two 16-bit chunks in reversed order.
8181
// XOR with 1 swaps odd/even pairs (0↔1, 2↔3, etc.). ESP32-S2 doesn't need adjustment.
82-
static HUB75_CONST inline constexpr uint16_t fifo_adjust_x(uint16_t x) {
82+
__attribute__((always_inline)) HUB75_CONST static inline constexpr uint16_t fifo_adjust_x(uint16_t x) {
8383
#if defined(CONFIG_IDF_TARGET_ESP32)
8484
return x ^ 1;
8585
#else
@@ -1038,39 +1038,52 @@ HUB75_IRAM void I2sDma::fill(uint16_t x, uint16_t y, uint16_t w, uint16_t h, uin
10381038
uint16_t upper_patterns[HUB75_BIT_DEPTH];
10391039
uint16_t lower_patterns[HUB75_BIT_DEPTH];
10401040
for (int bit = 0; bit < bit_depth_; bit++) {
1041-
const uint16_t mask = (1 << bit);
1041+
const uint16_t mask = BCM_BIT_MASKS[bit];
10421042
upper_patterns[bit] = ((r_corrected & mask) ? (1 << R1_BIT) : 0) | ((g_corrected & mask) ? (1 << G1_BIT) : 0) |
10431043
((b_corrected & mask) ? (1 << B1_BIT) : 0);
10441044
lower_patterns[bit] = ((r_corrected & mask) ? (1 << R2_BIT) : 0) | ((g_corrected & mask) ? (1 << G2_BIT) : 0) |
10451045
((b_corrected & mask) ? (1 << B2_BIT) : 0);
10461046
}
10471047

1048-
// Fill loop - coordinate transforms still needed per-pixel
1048+
// Pre-compute values for inner loop
1049+
const size_t bit_plane_stride = dma_width_ * 2;
1050+
const bool identity_transform = (rotation_ == Hub75Rotation::ROTATE_0) && !needs_layout_remap_ && !needs_scan_remap_;
1051+
1052+
// Fill loop
10491053
for (uint16_t dy = 0; dy < h; dy++) {
10501054
for (uint16_t dx = 0; dx < w; dx++) {
10511055
uint16_t px = x + dx;
10521056
uint16_t py = y + dy;
1057+
uint16_t row;
1058+
bool is_lower;
10531059

1054-
// Coordinate transformation pipeline (rotation + layout + scan remapping)
1055-
auto transformed = transform_coordinate(px, py, rotation_, needs_layout_remap_, needs_scan_remap_, layout_,
1056-
scan_wiring_, panel_width_, panel_height_, layout_rows_, layout_cols_,
1057-
virtual_width_, virtual_height_, dma_width_, num_rows_);
1058-
px = fifo_adjust_x(transformed.x);
1059-
const uint16_t row = transformed.row;
1060-
const bool is_lower = transformed.is_lower;
1061-
1062-
// Update all bit planes using pre-computed patterns
1063-
for (int bit = 0; bit < bit_depth_; bit++) {
1064-
uint16_t *buf = (uint16_t *) (target_buffers[row].data + (bit * dma_width_ * 2));
1065-
uint16_t word = buf[px]; // Read existing word (preserves control bits)
1066-
1067-
if (is_lower) {
1068-
word = (word & ~RGB_LOWER_MASK) | lower_patterns[bit];
1060+
// Fast path: identity transform (no rotation, standard layout, standard scan)
1061+
if (identity_transform) {
1062+
if (py < num_rows_) {
1063+
row = py;
1064+
is_lower = false;
10691065
} else {
1070-
word = (word & ~RGB_UPPER_MASK) | upper_patterns[bit];
1066+
row = py - num_rows_;
1067+
is_lower = true;
10711068
}
1069+
px = fifo_adjust_x(px);
1070+
} else {
1071+
// Full coordinate transformation pipeline
1072+
auto transformed = transform_coordinate(px, py, rotation_, needs_layout_remap_, needs_scan_remap_, layout_,
1073+
scan_wiring_, panel_width_, panel_height_, layout_rows_, layout_cols_,
1074+
virtual_width_, virtual_height_, dma_width_, num_rows_);
1075+
px = fifo_adjust_x(transformed.x);
1076+
row = transformed.row;
1077+
is_lower = transformed.is_lower;
1078+
}
10721079

1073-
buf[px] = word;
1080+
// Update all bit planes using pre-computed patterns (is_lower hoisted outside loop)
1081+
uint8_t *base_ptr = target_buffers[row].data;
1082+
const uint16_t clear_mask = is_lower ? ~RGB_LOWER_MASK : ~RGB_UPPER_MASK;
1083+
const uint16_t *patterns = is_lower ? lower_patterns : upper_patterns;
1084+
for (int bit = 0; bit < bit_depth_; bit++) {
1085+
uint16_t *buf = (uint16_t *) (base_ptr + (bit * bit_plane_stride));
1086+
buf[px] = (buf[px] & clear_mask) | patterns[bit];
10741087
}
10751088
}
10761089
}

0 commit comments

Comments
 (0)