From 15e690726d456f271fca586dc6dba4390d084201 Mon Sep 17 00:00:00 2001 From: Devin <70994866+RetroGamer02@users.noreply.github.com> Date: Sun, 6 Nov 2022 01:49:18 -0400 Subject: [PATCH 1/8] NDS Speed Boost --- src/goddard/gd_math.c | 12 ++++++------ src/nds/main.c | 2 ++ src/nds/nds_renderer.c | 36 ++++++++++++++++++------------------ 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/src/goddard/gd_math.c b/src/goddard/gd_math.c index baf43dd825..0a2b7f5c04 100644 --- a/src/goddard/gd_math.c +++ b/src/goddard/gd_math.c @@ -178,7 +178,7 @@ void gd_create_origin_lookat(Mat4f *mtx, struct GdVec3f *vec, f32 roll) { gd_set_identity_mat4(mtx); if (hMag != 0.0f) { - invertedHMag = 1.0f / hMag; + invertedHMag = swiDivide(1.0f, hMag); (*mtx)[0][0] = ((-unit.z * c) - (s * unit.y * unit.x)) * invertedHMag; (*mtx)[1][0] = ((unit.z * s) - (c * unit.y * unit.x)) * invertedHMag; (*mtx)[2][0] = -unit.x; @@ -949,12 +949,12 @@ void UNUSED gd_rot_mat_offset(Mat4f *dst, f32 x, f32 y, f32 z, s32 copy) { } mag = gd_sqrt_f(SQ(adj) + SQ(opp)); - c = adj / mag; - s = opp / mag; + c = swiDivide(adj, mag); + s = swiDivide(opp, mag); - vec.x = -y / opp; - vec.y = -x / opp; - vec.z = -z / opp; + vec.x = swiDivide(-y, opp); + vec.y = swiDivide(-x, opp); + vec.z = swiDivide(-z, opp); gd_create_rot_matrix(&rot, &vec, s, c); if (!copy) { diff --git a/src/nds/main.c b/src/nds/main.c index 571f94c87d..21e1b6ad9d 100644 --- a/src/nds/main.c +++ b/src/nds/main.c @@ -46,6 +46,8 @@ int main(void) { main_pool_init(pool, pool + sizeof(pool) / sizeof(pool[0])); gEffectsMemoryPool = mem_pool_init(0x4000, MEMORY_POOL_LEFT); + setCpuClock(true); + renderer_init(); #ifdef LIBFAT diff --git a/src/nds/nds_renderer.c b/src/nds/nds_renderer.c index 5d05ce323b..7f2fc55d37 100644 --- a/src/nds/nds_renderer.c +++ b/src/nds/nds_renderer.c @@ -417,7 +417,7 @@ static void g_vtx(Gwords *words) { const Vtx *vertices = (const Vtx*)words->w1; // Store vertices in the vertex buffer - memcpy(&vertex_buffer[index - count], vertices, count * sizeof(Vtx)); + swiCopy(vertices, &vertex_buffer[index - count], sizeof(Vtx) * 8); if (geometry_mode & G_LIGHTING) { // Recalculate transformed light vectors if the lights or modelview matrix changed @@ -436,9 +436,9 @@ static void g_vtx(Gwords *words) { int s = (lights[i].nx * lights[i].nx + lights[i].ny * lights[i].ny + lights[i].nz * lights[i].nz) >> 8; if (s > 0) { s = sqrt_fixed(s); - lights[i].nx = (lights[i].nx << 16) / s; - lights[i].ny = (lights[i].ny << 16) / s; - lights[i].nz = (lights[i].nz << 16) / s; + lights[i].nx = div32((lights[i].nx << 16), s); + lights[i].ny = div32((lights[i].ny << 16), s); + lights[i].nz = div32((lights[i].nz << 16), s); } } @@ -511,7 +511,7 @@ static void g_texture(Gwords *words) { static void g_popmtx(Gwords *words) { // Pop matrices from the modelview stack glMatrixMode(GL_MODELVIEW); - glPopMatrix(words->w1 / 64); + glPopMatrix(div32(words->w1, 64)); } static void g_geometrymode(Gwords *words) { @@ -600,7 +600,7 @@ static void g_moveword(Gwords *words) { switch (index) { case G_MW_NUMLIGHT: // Set the current number of lights, including the lookat vectors - num_lights = (words->w1 / 24) + 2; + num_lights = div32(words->w1, 24) + 2; break; case G_MW_FOG: @@ -758,19 +758,19 @@ static void g_rdphalf_2(Gwords *words) { // Draw one half of the rectangle, using depth hijacking glTexCoord2t16(s1, t1); - glVertex3v16(x1, y1, (--z_depth) / 6); + glVertex3v16(x1, y1, div32((--z_depth), 6)); glTexCoord2t16(s1, t2); - glVertex3v16(x1, y2, (--z_depth) / 6); + glVertex3v16(x1, y2, div32((--z_depth), 6)); glTexCoord2t16(s2, t1); - glVertex3v16(x2, y1, (--z_depth) / 6); + glVertex3v16(x2, y1, div32((--z_depth), 6)); // Draw the other half of the rectangle, using depth hijacking glTexCoord2t16(s2, t1); - glVertex3v16(x2, y1, (--z_depth) / 6); + glVertex3v16(x2, y1, div32((--z_depth), 6)); glTexCoord2t16(s1, t2); - glVertex3v16(x1, y2, (--z_depth) / 6); + glVertex3v16(x1, y2, div32((--z_depth), 6)); glTexCoord2t16(s2, t2); - glVertex3v16(x2, y2, (--z_depth) / 6); + glVertex3v16(x2, y2, div32((--z_depth), 6)); // Restore the original matrices glPopMatrix(1); @@ -849,14 +849,14 @@ static void g_fillrect(Gwords *words) { const int16_t y2 = -((((words->w0 >> 0) & 0xFFF) + (1 << 2)) * (2 << 12) / (240 << 2) - (1 << 12)); // Draw one half of the rectangle, using depth hijacking - glVertex3v16(x1, y1, (--z_depth) / 6); - glVertex3v16(x1, y2, (--z_depth) / 6); - glVertex3v16(x2, y1, (--z_depth) / 6); + glVertex3v16(x1, y1, div32((--z_depth), 6)); + glVertex3v16(x1, y2, div32((--z_depth), 6)); + glVertex3v16(x2, y1, div32((--z_depth), 6)); // Draw the other half of the rectangle, using depth hijacking - glVertex3v16(x2, y1, (--z_depth) / 6); - glVertex3v16(x1, y2, (--z_depth) / 6); - glVertex3v16(x2, y2, (--z_depth) / 6); + glVertex3v16(x2, y1, div32((--z_depth), 6)); + glVertex3v16(x1, y2, div32((--z_depth), 6)); + glVertex3v16(x2, y2, div32((--z_depth), 6)); // Restore the original matrices glMatrixMode(GL_PROJECTION); From 2db082c9a14ed03694839ac18a815cb19c00b694 Mon Sep 17 00:00:00 2001 From: Devin <70994866+RetroGamer02@users.noreply.github.com> Date: Sun, 6 Nov 2022 01:32:32 -0500 Subject: [PATCH 2/8] Use swiFastCopy instead --- src/nds/nds_renderer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nds/nds_renderer.c b/src/nds/nds_renderer.c index 7f2fc55d37..f515a4e8d1 100644 --- a/src/nds/nds_renderer.c +++ b/src/nds/nds_renderer.c @@ -417,7 +417,7 @@ static void g_vtx(Gwords *words) { const Vtx *vertices = (const Vtx*)words->w1; // Store vertices in the vertex buffer - swiCopy(vertices, &vertex_buffer[index - count], sizeof(Vtx) * 8); + swiFastCopy(vertices, &vertex_buffer[index - count], sizeof(Vtx) * 4); if (geometry_mode & G_LIGHTING) { // Recalculate transformed light vectors if the lights or modelview matrix changed From 20eed3456de48fbeb6f5b98f82abb3a1ab22ad8b Mon Sep 17 00:00:00 2001 From: Devin <70994866+RetroGamer02@users.noreply.github.com> Date: Sun, 6 Nov 2022 15:45:46 -0500 Subject: [PATCH 3/8] swiDivide does not work on float, and add N64 ifdef. --- src/goddard/gd_math.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/goddard/gd_math.c b/src/goddard/gd_math.c index 0a2b7f5c04..acc324365d 100644 --- a/src/goddard/gd_math.c +++ b/src/goddard/gd_math.c @@ -178,7 +178,7 @@ void gd_create_origin_lookat(Mat4f *mtx, struct GdVec3f *vec, f32 roll) { gd_set_identity_mat4(mtx); if (hMag != 0.0f) { - invertedHMag = swiDivide(1.0f, hMag); + invertedHMag = 1.0f / hMag; (*mtx)[0][0] = ((-unit.z * c) - (s * unit.y * unit.x)) * invertedHMag; (*mtx)[1][0] = ((unit.z * s) - (c * unit.y * unit.x)) * invertedHMag; (*mtx)[2][0] = -unit.x; @@ -949,13 +949,21 @@ void UNUSED gd_rot_mat_offset(Mat4f *dst, f32 x, f32 y, f32 z, s32 copy) { } mag = gd_sqrt_f(SQ(adj) + SQ(opp)); + #ifdef TARGET_N64 + c = adj / mag; + s = opp / mag; + + vec.x = -y / opp; + vec.y = -x / opp; + vec.z = -z / opp; + else c = swiDivide(adj, mag); s = swiDivide(opp, mag); vec.x = swiDivide(-y, opp); vec.y = swiDivide(-x, opp); vec.z = swiDivide(-z, opp); - + #endif gd_create_rot_matrix(&rot, &vec, s, c); if (!copy) { gd_mult_mat4f(dst, &rot, dst); From 7b482b2de782a0771bec4ee872cd30a20f80f0ca Mon Sep 17 00:00:00 2001 From: Devin <70994866+RetroGamer02@users.noreply.github.com> Date: Sun, 6 Nov 2022 15:48:15 -0500 Subject: [PATCH 4/8] Makes more sense to check if NDS --- src/goddard/gd_math.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/goddard/gd_math.c b/src/goddard/gd_math.c index acc324365d..faa06464d2 100644 --- a/src/goddard/gd_math.c +++ b/src/goddard/gd_math.c @@ -949,20 +949,20 @@ void UNUSED gd_rot_mat_offset(Mat4f *dst, f32 x, f32 y, f32 z, s32 copy) { } mag = gd_sqrt_f(SQ(adj) + SQ(opp)); - #ifdef TARGET_N64 - c = adj / mag; - s = opp / mag; - - vec.x = -y / opp; - vec.y = -x / opp; - vec.z = -z / opp; - else + #ifdef TARGET_NDS c = swiDivide(adj, mag); s = swiDivide(opp, mag); vec.x = swiDivide(-y, opp); vec.y = swiDivide(-x, opp); vec.z = swiDivide(-z, opp); + else + c = adj / mag; + s = opp / mag; + + vec.x = -y / opp; + vec.y = -x / opp; + vec.z = -z / opp; #endif gd_create_rot_matrix(&rot, &vec, s, c); if (!copy) { From 51ea94ffb059e3e064cd314521f1374ea5ecd074 Mon Sep 17 00:00:00 2001 From: Devin <70994866+RetroGamer02@users.noreply.github.com> Date: Sun, 6 Nov 2022 16:06:16 -0500 Subject: [PATCH 5/8] fix else --- src/goddard/gd_math.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/goddard/gd_math.c b/src/goddard/gd_math.c index faa06464d2..d2c497a292 100644 --- a/src/goddard/gd_math.c +++ b/src/goddard/gd_math.c @@ -956,7 +956,7 @@ void UNUSED gd_rot_mat_offset(Mat4f *dst, f32 x, f32 y, f32 z, s32 copy) { vec.x = swiDivide(-y, opp); vec.y = swiDivide(-x, opp); vec.z = swiDivide(-z, opp); - else + #else c = adj / mag; s = opp / mag; From 87d25492b85fc5ec16f2d8178c0fafff7fedb69f Mon Sep 17 00:00:00 2001 From: Devin <70994866+RetroGamer02@users.noreply.github.com> Date: Mon, 7 Nov 2022 16:51:46 -0500 Subject: [PATCH 6/8] Use tonccpy Thanks to Epicpkmn11 for letting me know of its existence. --- include/tonccpy.h | 43 +++++++++++++ src/nds/nds_renderer.c | 4 +- src/nds/tonccpy.c | 136 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 include/tonccpy.h create mode 100644 src/nds/tonccpy.c diff --git a/include/tonccpy.h b/include/tonccpy.h new file mode 100644 index 0000000000..1be8f92cdf --- /dev/null +++ b/include/tonccpy.h @@ -0,0 +1,43 @@ +//# Stuff you may not have yet. + +#ifndef TONCCPY_H +#define TONCCPY_H + + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef unsigned int uint; +#define BIT_MASK(len) ( (1<<(len))-1 ) +static inline u32 quad8(u8 x) { x |= x<<8; return x | x<<16; } + + +//# Declarations and inlines. + +void tonccpy(void *dst, const void *src, uint size); + +void __toncset(void *dst, u32 fill, uint size); +static inline void toncset(void *dst, u8 src, uint size); +static inline void toncset16(void *dst, u16 src, uint size); +static inline void toncset32(void *dst, u32 src, uint size); + + +//! VRAM-safe memset, byte version. Size in bytes. +static inline void toncset(void *dst, u8 src, uint size) +{ __toncset(dst, quad8(src), size); } + +//! VRAM-safe memset, halfword version. Size in hwords. +static inline void toncset16(void *dst, u16 src, uint size) +{ __toncset(dst, src|src<<16, size*2); } + +//! VRAM-safe memset, word version. Size in words. +static inline void toncset32(void *dst, u32 src, uint size) +{ __toncset(dst, src, size*4); } + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/nds/nds_renderer.c b/src/nds/nds_renderer.c index f515a4e8d1..425fabbba5 100644 --- a/src/nds/nds_renderer.c +++ b/src/nds/nds_renderer.c @@ -6,6 +6,8 @@ #include "nds_renderer.h" +#include "tonccpy.h" + struct Color { uint8_t r, g, b, a; }; @@ -417,7 +419,7 @@ static void g_vtx(Gwords *words) { const Vtx *vertices = (const Vtx*)words->w1; // Store vertices in the vertex buffer - swiFastCopy(vertices, &vertex_buffer[index - count], sizeof(Vtx) * 4); + tonccpy(&vertex_buffer[index - count], vertices, count * sizeof(Vtx)); if (geometry_mode & G_LIGHTING) { // Recalculate transformed light vectors if the lights or modelview matrix changed diff --git a/src/nds/tonccpy.c b/src/nds/tonccpy.c new file mode 100644 index 0000000000..b10e960cb9 --- /dev/null +++ b/src/nds/tonccpy.c @@ -0,0 +1,136 @@ +#include "tonccpy.h" +//# tonccpy.c + +//! VRAM-safe cpy. +/*! This version mimics memcpy in functionality, with + the benefit of working for VRAM as well. It is also + slightly faster than the original memcpy, but faster + implementations can be made. + \param dst Destination pointer. + \param src Source pointer. + \param size Fill-length in bytes. + \note The pointers and size need not be word-aligned. +*/ +void tonccpy(void *dst, const void *src, uint size) +{ + if (size==0 || dst==NULL || src==NULL) + return; + + uint count; + u16 *dst16; // hword destination + u8 *src8; // byte source + + // Ideal case: copy by 4x words. Leaves tail for later. + if ( ((u32)src|(u32)dst)%4==0 && size>=4) + { + u32 *src32= (u32*)src, *dst32= (u32*)dst; + + count= size/4; + uint tmp= count&3; + count /= 4; + + // Duff's Device, good friend! + switch(tmp) { + do { *dst32++ = *src32++; + case 3: *dst32++ = *src32++; + case 2: *dst32++ = *src32++; + case 1: *dst32++ = *src32++; + case 0: ; } while (count--); + } + + // Check for tail + size &= 3; + if (size == 0) + return; + + src8= (u8*)src32; + dst16= (u16*)dst32; + } + else // Unaligned. + { + uint dstOfs= (u32)dst&1; + src8= (u8*)src; + dst16= (u16*)(dst-dstOfs); + + // Head: 1 byte. + if (dstOfs != 0) + { + *dst16= (*dst16 & 0xFF) | *src8++<<8; + dst16++; + if (--size==0) + return; + } + } + + // Unaligned main: copy by 2x byte. + count= size/2; + while (count--) + { + *dst16++ = src8[0] | src8[1]<<8; + src8 += 2; + } + + // Tail: 1 byte. + if (size&1) + *dst16= (*dst16 &~ 0xFF) | *src8; +} +//# toncset.c + +//! VRAM-safe memset, internal routine. +/*! This version mimics memset in functionality, with + the benefit of working for VRAM as well. It is also + slightly faster than the original memset. + \param dst Destination pointer. + \param fill Word to fill with. + \param size Fill-length in bytes. + \note The \a dst pointer and \a size need not be + word-aligned. In the case of unaligned fills, \a fill + will be masked off to match the situation. +*/ +void __toncset(void *dst, u32 fill, uint size) +{ + if (size==0 || dst==NULL) + return; + + uint left= (u32)dst&3; + u32 *dst32= (u32*)(dst-left); + u32 count, mask; + + // Unaligned head. + if (left != 0) + { + // Adjust for very small stint. + if (left+size<4) + { + mask= BIT_MASK(size*8)<<(left*8); + *dst32= (*dst32 &~ mask) | (fill & mask); + return; + } + + mask= BIT_MASK(left*8); + *dst32= (*dst32 & mask) | (fill&~mask); + dst32++; + size -= 4-left; + } + + // Main stint. + count= size/4; + uint tmp= count&3; + count /= 4; + + switch(tmp) { + do { *dst32++ = fill; + case 3: *dst32++ = fill; + case 2: *dst32++ = fill; + case 1: *dst32++ = fill; + case 0: ; } while (count--); + } + + // Tail + size &= 3; + if (size) + { + mask= BIT_MASK(size*8); + *dst32= (*dst32 &~ mask) | (fill & mask); + } +} \ No newline at end of file From 3a82e640d6917cb5d866e4c14fb7dd79e971d5a5 Mon Sep 17 00:00:00 2001 From: Devin <70994866+RetroGamer02@users.noreply.github.com> Date: Mon, 7 Nov 2022 22:09:31 -0500 Subject: [PATCH 7/8] Use tonccpy in more places --- lib/src/_Ldtob.c | 40 +++++++++++++++++++++++++++----- lib/src/_Litob.c | 10 +++++++- lib/src/guMtxF2L.c | 10 +++++++- lib/src/sprintf.c | 10 +++++++- src/engine/level_script.c | 16 +++++++++++-- src/game/memory.c | 6 ++++- src/nds/ultra_reimplementation.c | 8 ++++--- 7 files changed, 85 insertions(+), 15 deletions(-) diff --git a/lib/src/_Ldtob.c b/lib/src/_Ldtob.c index 3656bd45af..6ea7ba46ac 100644 --- a/lib/src/_Ldtob.c +++ b/lib/src/_Ldtob.c @@ -4,6 +4,10 @@ #include #include "printf.h" +#ifdef TARGET_NDS +#include "tonccpy.h" +#endif + #define BUFF_LEN 0x20 static s16 _Ldunscale(s16 *, printf_struct *); @@ -76,7 +80,11 @@ void _Ldtob(printf_struct *args, u8 type) { } err = _Ldunscale(&exp, args); if (err > 0) { - memcpy(args->buff, err == 2 ? "NaN" : "Inf", args->part2_len = 3); + #ifdef TARGET_NDS + tonccpy(args->buff, err == 2 ? "NaN" : "Inf", args->part2_len = 3); + #else + memcpy(args->buff, err == 2 ? "NaN" : "Inf", args->part2_len = 3); + #endif return; } if (err == 0) { @@ -212,10 +220,18 @@ static void _Genld(printf_struct *px, u8 code, u8 *p, s16 nsig, s16 xexp) { if (px->precision < nsig) { nsig = px->precision; } - memcpy(&px->buff[px->part2_len], p, px->part3_len = nsig); + #ifdef TARGET_NDS + tonccpy(&px->buff[px->part2_len], p, px->part3_len = nsig); + #else + memcpy(&px->buff[px->part2_len], p, px->part3_len = nsig); + #endif px->num_trailing_zeros = px->precision - nsig; } else if (nsig < xexp) { /* zeros before point */ - memcpy(&px->buff[px->part2_len], p, nsig); + #ifdef TARGET_NDS + tonccpy(&px->buff[px->part2_len], p, nsig); + #else + memcpy(&px->buff[px->part2_len], p, nsig); + #endif px->part2_len += nsig; px->num_mid_zeros = xexp - nsig; if (0 < px->precision || px->flags & FLAGS_HASH) { @@ -223,7 +239,11 @@ static void _Genld(printf_struct *px, u8 code, u8 *p, s16 nsig, s16 xexp) { } px->num_trailing_zeros = px->precision; } else { /* enough digits before point */ - memcpy(&px->buff[px->part2_len], p, xexp); + #ifdef TARGET_NDS + tonccpy(&px->buff[px->part2_len], p, xexp); + #else + memcpy(&px->buff[px->part2_len], p, xexp); + #endif px->part2_len += xexp; nsig -= xexp; if (0 < px->precision || px->flags & FLAGS_HASH) { @@ -232,7 +252,11 @@ static void _Genld(printf_struct *px, u8 code, u8 *p, s16 nsig, s16 xexp) { if (px->precision < nsig) { nsig = px->precision; } - memcpy(&px->buff[px->part2_len], p + xexp, nsig); + #ifdef TARGET_NDS + tonccpy(&px->buff[px->part2_len], p + xexp, nsig); + #else + memcpy(&px->buff[px->part2_len], p + xexp, nsig); + #endif px->part2_len += nsig; px->num_mid_zeros = px->precision - nsig; } @@ -254,7 +278,11 @@ static void _Genld(printf_struct *px, u8 code, u8 *p, s16 nsig, s16 xexp) { if (px->precision < --nsig) { nsig = px->precision; } - memcpy(&px->buff[px->part2_len], p, nsig); + #ifdef TARGET_NDS + tonccpy(&px->buff[px->part2_len], p, nsig); + #else + memcpy(&px->buff[px->part2_len], p, nsig); + #endif px->part2_len += nsig; px->num_mid_zeros = px->precision - nsig; } diff --git a/lib/src/_Litob.c b/lib/src/_Litob.c index 58ed081fff..c89f75b92d 100644 --- a/lib/src/_Litob.c +++ b/lib/src/_Litob.c @@ -3,6 +3,10 @@ #include #include "printf.h" +#ifdef TARGET_NDS +#include "tonccpy.h" +#endif + #define BUFF_LEN 0x18 static u8 D_80334960[] = "0123456789abcdef"; @@ -44,7 +48,11 @@ void _Litob(printf_struct *args, u8 type) { args->part2_len = BUFF_LEN - buff_ind; - memcpy(args->buff, buff + buff_ind, args->part2_len); + #ifdef TARGET_NDS + tonccpy(args->buff, buff + buff_ind, args->part2_len); + #else + memcpy(args->buff, buff + buff_ind, args->part2_len); + #endif if (args->part2_len < args->precision) { args->num_leading_zeros = args->precision - args->part2_len; diff --git a/lib/src/guMtxF2L.c b/lib/src/guMtxF2L.c index 2b9d93592c..4a265c28dc 100644 --- a/lib/src/guMtxF2L.c +++ b/lib/src/guMtxF2L.c @@ -3,6 +3,10 @@ #include #endif +#ifdef TARGET_NDS +#include "tonccpy.h" +#endif + #ifndef GBI_FLOATS void guMtxF2L(float mf[4][4], Mtx *m) { int r, c; @@ -42,7 +46,11 @@ void guMtxL2F(float mf[4][4], Mtx *m) { } #else void guMtxF2L(float mf[4][4], Mtx *m) { - memcpy(m, mf, sizeof(Mtx)); + #ifdef TARGET_NDS + tonccpy(m, mf, sizeof(Mtx)); + #else + memcpy(m, mf, sizeof(Mtx)); + #endif } #endif diff --git a/lib/src/sprintf.c b/lib/src/sprintf.c index 8450fbc530..89752195c9 100644 --- a/lib/src/sprintf.c +++ b/lib/src/sprintf.c @@ -3,6 +3,10 @@ #include "printf.h" #include +#ifdef TARGET_NDS +#include "tonccpy.h" +#endif + char *proutSprintf(char *dst, const char *src, size_t count); int sprintf(char *dst, const char *fmt, ...) { @@ -17,5 +21,9 @@ int sprintf(char *dst, const char *fmt, ...) { } char *proutSprintf(char *dst, const char *src, size_t count) { - return (char *) memcpy((u8 *) dst, (u8 *) src, count) + count; + #ifdef TARGET_NDS + return (char *) tonccpy((u8 *) dst, (u8 *) src, count) + count; + #else + return (char *) memcpy((u8 *) dst, (u8 *) src, count) + count; + #endif } diff --git a/src/engine/level_script.c b/src/engine/level_script.c index b0463393d9..0ea1bd07f0 100644 --- a/src/engine/level_script.c +++ b/src/engine/level_script.c @@ -25,6 +25,10 @@ #include "surface_collision.h" #include "surface_load.h" +#ifdef TARGET_NDS +#include "tonccpy.h" +#endif + #define CMD_GET(type, offset) (*(type *) (CMD_PROCESS_OFFSET(offset) + (u8 *) sCurrentCmd)) // These are equal @@ -604,7 +608,11 @@ static void level_cmd_set_terrain_data(void) { data = segmented_to_virtual(CMD_GET(void *, 4)); size = get_area_terrain_size(data) * sizeof(Collision); gAreas[sCurrAreaIndex].terrainData = alloc_only_pool_alloc(sLevelPool, size); - memcpy(gAreas[sCurrAreaIndex].terrainData, data, size); + #ifdef TARGET_NDS + tonccpy(gAreas[sCurrAreaIndex].terrainData, data, size); + #else + memcpy(gAreas[sCurrAreaIndex].terrainData, data, size); + #endif #endif } sCurrentCmd = CMD_NEXT; @@ -630,7 +638,11 @@ static void level_cmd_set_macro_objects(void) { len += 4; } gAreas[sCurrAreaIndex].macroObjects = alloc_only_pool_alloc(sLevelPool, len * sizeof(MacroObject)); - memcpy(gAreas[sCurrAreaIndex].macroObjects, data, len * sizeof(MacroObject)); + #ifdef TARGET_NDS + tonccpy(gAreas[sCurrAreaIndex].macroObjects, data, len * sizeof(MacroObject)); + #else + memcpy(gAreas[sCurrAreaIndex].macroObjects, data, len * sizeof(MacroObject)); + #endif #endif } sCurrentCmd = CMD_NEXT; diff --git a/src/game/memory.c b/src/game/memory.c index 83f55f7c3b..9835c3de10 100644 --- a/src/game/memory.c +++ b/src/game/memory.c @@ -15,6 +15,10 @@ #include "segment_symbols.h" #include "segments.h" +#ifdef TARGET_NDS +#include "tonccpy.h" +#endif + // round up to the next multiple #define ALIGN4(val) (((val) + 0x3) & ~0x3) #define ALIGN8(val) (((val) + 0x7) & ~0x7) @@ -263,7 +267,7 @@ static void dma_read(u8 *dest, u8 *srcStart, u8 *srcEnd) { size -= copySize; } #else - memcpy(dest, srcStart, srcEnd - srcStart); + tonccpy(dest, srcStart, srcEnd - srcStart); #endif } diff --git a/src/nds/ultra_reimplementation.c b/src/nds/ultra_reimplementation.c index 2b28ea849e..ea1b27e938 100644 --- a/src/nds/ultra_reimplementation.c +++ b/src/nds/ultra_reimplementation.c @@ -7,6 +7,8 @@ #include #endif +#include "tonccpy.h" + extern OSMgrArgs piMgrArgs; u64 osClockRate = 62500000; @@ -14,7 +16,7 @@ u64 osClockRate = 62500000; s32 osPiStartDma(UNUSED OSIoMesg *mb, UNUSED s32 priority, UNUSED s32 direction, uintptr_t devAddr, void *vAddr, size_t nbytes, UNUSED OSMesgQueue *mq) { - memcpy(vAddr, (const void *) devAddr, nbytes); + tonccpy(vAddr, (const void *) devAddr, nbytes); return 0; } @@ -151,7 +153,7 @@ s32 osEepromLongRead(UNUSED OSMesgQueue *mq, u8 address, u8 *buffer, int nbytes) return -1; } if (fread(content, 1, 512, fp) == 512) { - memcpy(buffer, content + address * 8, nbytes); + tonccpy(buffer, content + address * 8, nbytes); ret = 0; } fclose(fp); @@ -164,7 +166,7 @@ s32 osEepromLongWrite(UNUSED OSMesgQueue *mq, u8 address, u8 *buffer, int nbytes if (address != 0 || nbytes != 512) { osEepromLongRead(mq, 0, content, 512); } - memcpy(content + address * 8, buffer, nbytes); + tonccpy(content + address * 8, buffer, nbytes); #ifdef TARGET_WEB EM_ASM({ From e06a922cad2046962c87617bad6a7921d5a7cfe2 Mon Sep 17 00:00:00 2001 From: Devin <70994866+RetroGamer02@users.noreply.github.com> Date: Mon, 7 Nov 2022 22:18:12 -0500 Subject: [PATCH 8/8] Fixed some include errors --- lib/src/_Ldtob.c | 4 ---- lib/src/_Litob.c | 4 ---- lib/src/guMtxF2L.c | 4 ---- lib/src/sprintf.c | 4 ---- src/engine/level_script.c | 4 ---- src/game/memory.c | 4 ---- src/nds/ultra_reimplementation.c | 2 -- 7 files changed, 26 deletions(-) diff --git a/lib/src/_Ldtob.c b/lib/src/_Ldtob.c index 6ea7ba46ac..7bf50da2ff 100644 --- a/lib/src/_Ldtob.c +++ b/lib/src/_Ldtob.c @@ -4,10 +4,6 @@ #include #include "printf.h" -#ifdef TARGET_NDS -#include "tonccpy.h" -#endif - #define BUFF_LEN 0x20 static s16 _Ldunscale(s16 *, printf_struct *); diff --git a/lib/src/_Litob.c b/lib/src/_Litob.c index c89f75b92d..526efefcd4 100644 --- a/lib/src/_Litob.c +++ b/lib/src/_Litob.c @@ -3,10 +3,6 @@ #include #include "printf.h" -#ifdef TARGET_NDS -#include "tonccpy.h" -#endif - #define BUFF_LEN 0x18 static u8 D_80334960[] = "0123456789abcdef"; diff --git a/lib/src/guMtxF2L.c b/lib/src/guMtxF2L.c index 4a265c28dc..11fe0da9e1 100644 --- a/lib/src/guMtxF2L.c +++ b/lib/src/guMtxF2L.c @@ -3,10 +3,6 @@ #include #endif -#ifdef TARGET_NDS -#include "tonccpy.h" -#endif - #ifndef GBI_FLOATS void guMtxF2L(float mf[4][4], Mtx *m) { int r, c; diff --git a/lib/src/sprintf.c b/lib/src/sprintf.c index 89752195c9..7ce054f532 100644 --- a/lib/src/sprintf.c +++ b/lib/src/sprintf.c @@ -3,10 +3,6 @@ #include "printf.h" #include -#ifdef TARGET_NDS -#include "tonccpy.h" -#endif - char *proutSprintf(char *dst, const char *src, size_t count); int sprintf(char *dst, const char *fmt, ...) { diff --git a/src/engine/level_script.c b/src/engine/level_script.c index 0ea1bd07f0..92ebbd4318 100644 --- a/src/engine/level_script.c +++ b/src/engine/level_script.c @@ -25,10 +25,6 @@ #include "surface_collision.h" #include "surface_load.h" -#ifdef TARGET_NDS -#include "tonccpy.h" -#endif - #define CMD_GET(type, offset) (*(type *) (CMD_PROCESS_OFFSET(offset) + (u8 *) sCurrentCmd)) // These are equal diff --git a/src/game/memory.c b/src/game/memory.c index 9835c3de10..661000f3b8 100644 --- a/src/game/memory.c +++ b/src/game/memory.c @@ -15,10 +15,6 @@ #include "segment_symbols.h" #include "segments.h" -#ifdef TARGET_NDS -#include "tonccpy.h" -#endif - // round up to the next multiple #define ALIGN4(val) (((val) + 0x3) & ~0x3) #define ALIGN8(val) (((val) + 0x7) & ~0x7) diff --git a/src/nds/ultra_reimplementation.c b/src/nds/ultra_reimplementation.c index ea1b27e938..c3821570c7 100644 --- a/src/nds/ultra_reimplementation.c +++ b/src/nds/ultra_reimplementation.c @@ -7,8 +7,6 @@ #include #endif -#include "tonccpy.h" - extern OSMgrArgs piMgrArgs; u64 osClockRate = 62500000;