Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NDS Speed Boost #24

Open
wants to merge 8 commits into
base: nds
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions include/tonccpy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
//# Stuff you may not have yet.

#ifndef TONCCPY_H
#define TONCCPY_H


#ifdef __cplusplus
extern "C" {
#endif

#include <nds.h>

typedef unsigned int uint;
#define BIT_MASK(len) ( (1<<(len))-1 )
static inline u32 quad8(u8 x) { x |= x<<8; return x | x<<16; }


//# Declarations and inlines.

void tonccpy(void *dst, const void *src, uint size);

void __toncset(void *dst, u32 fill, uint size);
static inline void toncset(void *dst, u8 src, uint size);
static inline void toncset16(void *dst, u16 src, uint size);
static inline void toncset32(void *dst, u32 src, uint size);


//! VRAM-safe memset, byte version. Size in bytes.
static inline void toncset(void *dst, u8 src, uint size)
{ __toncset(dst, quad8(src), size); }

//! VRAM-safe memset, halfword version. Size in hwords.
static inline void toncset16(void *dst, u16 src, uint size)
{ __toncset(dst, src|src<<16, size*2); }

//! VRAM-safe memset, word version. Size in words.
static inline void toncset32(void *dst, u32 src, uint size)
{ __toncset(dst, src, size*4); }

#ifdef __cplusplus
}
#endif
#endif
36 changes: 30 additions & 6 deletions lib/src/_Ldtob.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ void _Ldtob(printf_struct *args, u8 type) {
}
err = _Ldunscale(&exp, args);
if (err > 0) {
memcpy(args->buff, err == 2 ? "NaN" : "Inf", args->part2_len = 3);
#ifdef TARGET_NDS
tonccpy(args->buff, err == 2 ? "NaN" : "Inf", args->part2_len = 3);
#else
memcpy(args->buff, err == 2 ? "NaN" : "Inf", args->part2_len = 3);
#endif
return;
}
if (err == 0) {
Expand Down Expand Up @@ -212,18 +216,30 @@ static void _Genld(printf_struct *px, u8 code, u8 *p, s16 nsig, s16 xexp) {
if (px->precision < nsig) {
nsig = px->precision;
}
memcpy(&px->buff[px->part2_len], p, px->part3_len = nsig);
#ifdef TARGET_NDS
tonccpy(&px->buff[px->part2_len], p, px->part3_len = nsig);
#else
memcpy(&px->buff[px->part2_len], p, px->part3_len = nsig);
#endif
px->num_trailing_zeros = px->precision - nsig;
} else if (nsig < xexp) { /* zeros before point */
memcpy(&px->buff[px->part2_len], p, nsig);
#ifdef TARGET_NDS
tonccpy(&px->buff[px->part2_len], p, nsig);
#else
memcpy(&px->buff[px->part2_len], p, nsig);
#endif
px->part2_len += nsig;
px->num_mid_zeros = xexp - nsig;
if (0 < px->precision || px->flags & FLAGS_HASH) {
px->buff[px->part2_len] = point, ++px->part3_len;
}
px->num_trailing_zeros = px->precision;
} else { /* enough digits before point */
memcpy(&px->buff[px->part2_len], p, xexp);
#ifdef TARGET_NDS
tonccpy(&px->buff[px->part2_len], p, xexp);
#else
memcpy(&px->buff[px->part2_len], p, xexp);
#endif
px->part2_len += xexp;
nsig -= xexp;
if (0 < px->precision || px->flags & FLAGS_HASH) {
Expand All @@ -232,7 +248,11 @@ static void _Genld(printf_struct *px, u8 code, u8 *p, s16 nsig, s16 xexp) {
if (px->precision < nsig) {
nsig = px->precision;
}
memcpy(&px->buff[px->part2_len], p + xexp, nsig);
#ifdef TARGET_NDS
tonccpy(&px->buff[px->part2_len], p + xexp, nsig);
#else
memcpy(&px->buff[px->part2_len], p + xexp, nsig);
#endif
px->part2_len += nsig;
px->num_mid_zeros = px->precision - nsig;
}
Expand All @@ -254,7 +274,11 @@ static void _Genld(printf_struct *px, u8 code, u8 *p, s16 nsig, s16 xexp) {
if (px->precision < --nsig) {
nsig = px->precision;
}
memcpy(&px->buff[px->part2_len], p, nsig);
#ifdef TARGET_NDS
tonccpy(&px->buff[px->part2_len], p, nsig);
#else
memcpy(&px->buff[px->part2_len], p, nsig);
#endif
px->part2_len += nsig;
px->num_mid_zeros = px->precision - nsig;
}
Expand Down
6 changes: 5 additions & 1 deletion lib/src/_Litob.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,11 @@ void _Litob(printf_struct *args, u8 type) {

args->part2_len = BUFF_LEN - buff_ind;

memcpy(args->buff, buff + buff_ind, args->part2_len);
#ifdef TARGET_NDS
tonccpy(args->buff, buff + buff_ind, args->part2_len);
#else
memcpy(args->buff, buff + buff_ind, args->part2_len);
#endif

if (args->part2_len < args->precision) {
args->num_leading_zeros = args->precision - args->part2_len;
Expand Down
6 changes: 5 additions & 1 deletion lib/src/guMtxF2L.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,11 @@ void guMtxL2F(float mf[4][4], Mtx *m) {
}
#else
void guMtxF2L(float mf[4][4], Mtx *m) {
memcpy(m, mf, sizeof(Mtx));
#ifdef TARGET_NDS
tonccpy(m, mf, sizeof(Mtx));
#else
memcpy(m, mf, sizeof(Mtx));
#endif
}
#endif

Expand Down
6 changes: 5 additions & 1 deletion lib/src/sprintf.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,9 @@ int sprintf(char *dst, const char *fmt, ...) {
}

char *proutSprintf(char *dst, const char *src, size_t count) {
return (char *) memcpy((u8 *) dst, (u8 *) src, count) + count;
#ifdef TARGET_NDS
return (char *) tonccpy((u8 *) dst, (u8 *) src, count) + count;
#else
return (char *) memcpy((u8 *) dst, (u8 *) src, count) + count;
#endif
}
12 changes: 10 additions & 2 deletions src/engine/level_script.c
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,11 @@ static void level_cmd_set_terrain_data(void) {
data = segmented_to_virtual(CMD_GET(void *, 4));
size = get_area_terrain_size(data) * sizeof(Collision);
gAreas[sCurrAreaIndex].terrainData = alloc_only_pool_alloc(sLevelPool, size);
memcpy(gAreas[sCurrAreaIndex].terrainData, data, size);
#ifdef TARGET_NDS
tonccpy(gAreas[sCurrAreaIndex].terrainData, data, size);
#else
memcpy(gAreas[sCurrAreaIndex].terrainData, data, size);
#endif
#endif
}
sCurrentCmd = CMD_NEXT;
Expand All @@ -630,7 +634,11 @@ static void level_cmd_set_macro_objects(void) {
len += 4;
}
gAreas[sCurrAreaIndex].macroObjects = alloc_only_pool_alloc(sLevelPool, len * sizeof(MacroObject));
memcpy(gAreas[sCurrAreaIndex].macroObjects, data, len * sizeof(MacroObject));
#ifdef TARGET_NDS
tonccpy(gAreas[sCurrAreaIndex].macroObjects, data, len * sizeof(MacroObject));
#else
memcpy(gAreas[sCurrAreaIndex].macroObjects, data, len * sizeof(MacroObject));
#endif
#endif
}
sCurrentCmd = CMD_NEXT;
Expand Down
2 changes: 1 addition & 1 deletion src/game/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ static void dma_read(u8 *dest, u8 *srcStart, u8 *srcEnd) {
size -= copySize;
}
#else
memcpy(dest, srcStart, srcEnd - srcStart);
tonccpy(dest, srcStart, srcEnd - srcStart);
#endif
}

Expand Down
10 changes: 9 additions & 1 deletion src/goddard/gd_math.c
Original file line number Diff line number Diff line change
Expand Up @@ -949,13 +949,21 @@ void UNUSED gd_rot_mat_offset(Mat4f *dst, f32 x, f32 y, f32 z, s32 copy) {
}

mag = gd_sqrt_f(SQ(adj) + SQ(opp));
#ifdef TARGET_NDS
c = swiDivide(adj, mag);
s = swiDivide(opp, mag);

vec.x = swiDivide(-y, opp);
vec.y = swiDivide(-x, opp);
vec.z = swiDivide(-z, opp);
#else
c = adj / mag;
s = opp / mag;

vec.x = -y / opp;
vec.y = -x / opp;
vec.z = -z / opp;

#endif
gd_create_rot_matrix(&rot, &vec, s, c);
if (!copy) {
gd_mult_mat4f(dst, &rot, dst);
Expand Down
2 changes: 2 additions & 0 deletions src/nds/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ int main(void) {
main_pool_init(pool, pool + sizeof(pool) / sizeof(pool[0]));
gEffectsMemoryPool = mem_pool_init(0x4000, MEMORY_POOL_LEFT);

setCpuClock(true);

renderer_init();

#ifdef LIBFAT
Expand Down
38 changes: 20 additions & 18 deletions src/nds/nds_renderer.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#include "nds_renderer.h"

#include "tonccpy.h"

struct Color {
uint8_t r, g, b, a;
};
Expand Down Expand Up @@ -417,7 +419,7 @@ static void g_vtx(Gwords *words) {
const Vtx *vertices = (const Vtx*)words->w1;

// Store vertices in the vertex buffer
memcpy(&vertex_buffer[index - count], vertices, count * sizeof(Vtx));
tonccpy(&vertex_buffer[index - count], vertices, count * sizeof(Vtx));

if (geometry_mode & G_LIGHTING) {
// Recalculate transformed light vectors if the lights or modelview matrix changed
Expand All @@ -436,9 +438,9 @@ static void g_vtx(Gwords *words) {
int s = (lights[i].nx * lights[i].nx + lights[i].ny * lights[i].ny + lights[i].nz * lights[i].nz) >> 8;
if (s > 0) {
s = sqrt_fixed(s);
lights[i].nx = (lights[i].nx << 16) / s;
lights[i].ny = (lights[i].ny << 16) / s;
lights[i].nz = (lights[i].nz << 16) / s;
lights[i].nx = div32((lights[i].nx << 16), s);
lights[i].ny = div32((lights[i].ny << 16), s);
lights[i].nz = div32((lights[i].nz << 16), s);
}
}

Expand Down Expand Up @@ -511,7 +513,7 @@ static void g_texture(Gwords *words) {
static void g_popmtx(Gwords *words) {
// Pop matrices from the modelview stack
glMatrixMode(GL_MODELVIEW);
glPopMatrix(words->w1 / 64);
glPopMatrix(div32(words->w1, 64));
}

static void g_geometrymode(Gwords *words) {
Expand Down Expand Up @@ -600,7 +602,7 @@ static void g_moveword(Gwords *words) {
switch (index) {
case G_MW_NUMLIGHT:
// Set the current number of lights, including the lookat vectors
num_lights = (words->w1 / 24) + 2;
num_lights = div32(words->w1, 24) + 2;
break;

case G_MW_FOG:
Expand Down Expand Up @@ -758,19 +760,19 @@ static void g_rdphalf_2(Gwords *words) {

// Draw one half of the rectangle, using depth hijacking
glTexCoord2t16(s1, t1);
glVertex3v16(x1, y1, (--z_depth) / 6);
glVertex3v16(x1, y1, div32((--z_depth), 6));
glTexCoord2t16(s1, t2);
glVertex3v16(x1, y2, (--z_depth) / 6);
glVertex3v16(x1, y2, div32((--z_depth), 6));
glTexCoord2t16(s2, t1);
glVertex3v16(x2, y1, (--z_depth) / 6);
glVertex3v16(x2, y1, div32((--z_depth), 6));

// Draw the other half of the rectangle, using depth hijacking
glTexCoord2t16(s2, t1);
glVertex3v16(x2, y1, (--z_depth) / 6);
glVertex3v16(x2, y1, div32((--z_depth), 6));
glTexCoord2t16(s1, t2);
glVertex3v16(x1, y2, (--z_depth) / 6);
glVertex3v16(x1, y2, div32((--z_depth), 6));
glTexCoord2t16(s2, t2);
glVertex3v16(x2, y2, (--z_depth) / 6);
glVertex3v16(x2, y2, div32((--z_depth), 6));

// Restore the original matrices
glPopMatrix(1);
Expand Down Expand Up @@ -849,14 +851,14 @@ static void g_fillrect(Gwords *words) {
const int16_t y2 = -((((words->w0 >> 0) & 0xFFF) + (1 << 2)) * (2 << 12) / (240 << 2) - (1 << 12));

// Draw one half of the rectangle, using depth hijacking
glVertex3v16(x1, y1, (--z_depth) / 6);
glVertex3v16(x1, y2, (--z_depth) / 6);
glVertex3v16(x2, y1, (--z_depth) / 6);
glVertex3v16(x1, y1, div32((--z_depth), 6));
glVertex3v16(x1, y2, div32((--z_depth), 6));
glVertex3v16(x2, y1, div32((--z_depth), 6));

// Draw the other half of the rectangle, using depth hijacking
glVertex3v16(x2, y1, (--z_depth) / 6);
glVertex3v16(x1, y2, (--z_depth) / 6);
glVertex3v16(x2, y2, (--z_depth) / 6);
glVertex3v16(x2, y1, div32((--z_depth), 6));
glVertex3v16(x1, y2, div32((--z_depth), 6));
glVertex3v16(x2, y2, div32((--z_depth), 6));

// Restore the original matrices
glMatrixMode(GL_PROJECTION);
Expand Down
Loading