Skip to content

Commit 31433bb

Browse files
committed
swars: Remade some of the gpoly assembly inlines
1 parent d3bf510 commit 31433bb

1 file changed

Lines changed: 110 additions & 23 deletions

File tree

src/bflib_render_gpoly.c

Lines changed: 110 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -480,22 +480,54 @@ static inline ubyte __CFADDL__(long x, long y)
480480
return (ulong)(x) > (ulong)(x+y);
481481
}
482482

483+
/**
484+
* whether the subtraction (x-y) of two byte ints would use carry
485+
*/
486+
static inline ubyte __CFSUBB__(sbyte x, sbyte y)
487+
{
488+
return (ubyte)(x) < (ubyte)(y);
489+
}
490+
491+
/**
492+
* whether the subtraction (x-y) of two long ints would use carry
493+
*/
494+
static inline ubyte __CFSUBL__(long x, long y)
495+
{
496+
return (ulong)(x) < (ulong)(y);
497+
}
498+
483499
int gpoly_mul_rot_1(int a1, int a2)
484500
{
501+
#if 1
485502
int ret;
486503
asm volatile (
487-
"call ASM_gpoly_mul_rot_1\n"
504+
"imul %%edx\n"
505+
"shl %%eax\n"
506+
"rcl %%edx\n"
507+
"mov %%dx,%%ax\n"
508+
"rol $0x10,%%eax\n"
509+
"jns jump_gpoly_mul_rot_1_noinc\n"
510+
"inc %%eax\n"
511+
"jump_gpoly_mul_rot_1_noinc:\n"
488512
: "=r" (ret) : "a" (a1), "d" (a2));
489513
return ret;
514+
#endif
490515
}
491516

492517
int gpoly_mul_rot_2(int a1, int a2)
493518
{
519+
#if 1
494520
int ret;
495521
asm volatile (
496-
"call ASM_gpoly_mul_rot_2\n"
522+
"imul %%edx\n"
523+
"mov %%dx,%%ax\n"
524+
"rol $0x10,%%eax\n"
525+
"jns jump_gpoly_mul_rot_2_noinc\n"
526+
"inc %%eax\n"
527+
"jump_gpoly_mul_rot_2_noinc:\n"
497528
: "=r" (ret) : "a" (a1), "d" (a2));
498529
return ret;
530+
#endif
499531
}
500532

501533
void gpoly_sta_md03(struct gpoly_state *st)
@@ -1203,6 +1235,7 @@ void gpoly_sta_md28(struct gpoly_state *st)
12031235

12041236
int gpoly_stb_drw_pixel(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
12051237
{
1238+
#if 1
12061239
int ret;
12071240
int loc2d, loc4c;
12081241
ubyte *loc3b;
@@ -1222,10 +1255,12 @@ int gpoly_stb_drw_pixel(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
12221255
*a3b = loc3b - vec_map;
12231256
*a4c = loc4c;
12241257
return ret & 0xFFFF;
1258+
#endif
12251259
}
12261260

12271261
int gpoly_stb_drw_pixel2(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
12281262
{
1263+
#if 1
12291264
int ret;
12301265
int loc2d, loc4c;
12311266
ubyte *loc3b;
@@ -1244,10 +1279,12 @@ int gpoly_stb_drw_pixel2(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
12441279
*a3b = loc3b - vec_map;
12451280
*a4c = loc4c;
12461281
return ret & 0xFF;
1282+
#endif
12471283
}
12481284

12491285
void gpoly_stb_drw_incr1a(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
12501286
{
1287+
#if 1
12511288
int loc2d, loc4c;
12521289
ubyte *loc3b;
12531290

@@ -1263,10 +1300,12 @@ void gpoly_stb_drw_incr1a(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
12631300
*a2d = loc2d;
12641301
*a3b = loc3b - vec_map;
12651302
*a4c = loc4c;
1303+
#endif
12661304
}
12671305

12681306
void gpoly_stb_drw_incr1b(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
12691307
{
1308+
#if 1
12701309
int loc2d, loc4c;
12711310
ubyte *loc3b;
12721311

@@ -1281,10 +1320,12 @@ void gpoly_stb_drw_incr1b(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
12811320
*a2d = loc2d;
12821321
*a3b = loc3b - vec_map;
12831322
*a4c = loc4c;
1323+
#endif
12841324
}
12851325

12861326
void gpoly_stb_drw_incr2(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
12871327
{
1328+
#if 1
12881329
int loc2d, loc4c;
12891330
ubyte *loc3b;
12901331

@@ -1300,38 +1341,39 @@ void gpoly_stb_drw_incr2(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
13001341
*a2d = loc2d;
13011342
*a3b = loc3b - vec_map;
13021343
*a4c = loc4c;
1344+
#endif
13031345
}
13041346

13051347
void gpoly_stb_drw_incr3(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
13061348
{
1307-
int loc2d, loc4c;
1308-
ubyte *loc3b;
1309-
ubyte loc_0E4l, loc_0E4h;
1310-
ubyte v25;
1311-
uint v29, v30;
1349+
int a3b_bias;
1350+
ubyte a3b_l, a3b_h;
1351+
ubyte loc_carry;
1352+
int loc_4c, loc_2d, loc_3bh;
13121353

1313-
loc2d = *a2d;
1314-
loc3b = vec_map + *a3b;
1315-
loc4c = *a4c;
1354+
a3b_bias = ((intptr_t)vec_map & 0xFFFF) + *a3b;
13161355

1317-
v25 = ((uint)loc3b & 0xFF) < (st->var_0A8 & 0xFF);
1318-
loc_0E4l = ((uint)loc3b & 0xFF) - (st->var_0A8 & 0xFF);
1319-
v29 = v25 + st->var_0BC;
1320-
v25 = (uint)loc4c < v29;
1321-
loc4c = loc4c - v29;
1322-
v30 = v25 + st->var_0B8;
1323-
v25 = (uint)loc2d < v30;
1324-
loc2d = loc2d - v30;
1325-
loc_0E4h = (((uint)loc3b >> 8) & 0xFF) - (v25 + (st->var_0B4 & 0xFF));
1326-
loc3b = (ubyte *)(((uint)vec_map & ~0xFFFF) | loc_0E4l | (loc_0E4h << 8));
1356+
loc_carry = __CFSUBB__(a3b_bias, st->var_0A8);
1357+
a3b_l = a3b_bias - st->var_0A8;
1358+
loc_4c = *a4c - loc_carry;
13271359

1328-
*a2d = loc2d;
1329-
*a3b = loc3b - vec_map;
1330-
*a4c = loc4c;
1360+
loc_carry = __CFSUBL__(loc_4c, st->var_0BC);
1361+
*a4c = loc_4c - st->var_0BC;
1362+
loc_2d = *a2d - loc_carry;
1363+
1364+
loc_carry = __CFSUBL__(loc_2d, st->var_0B8);
1365+
*a2d = loc_2d - st->var_0B8;
1366+
loc_3bh = ((uint)a3b_bias >> 8) - loc_carry;
1367+
1368+
a3b_h = loc_3bh - st->var_0B4;
1369+
a3b_bias = (a3b_h << 8) | a3b_l;
1370+
1371+
*a3b = a3b_bias - ((intptr_t)vec_map & 0xFFFF);
13311372
}
13321373

13331374
void gpoly_stb_drw_incr4(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
13341375
{
1376+
#if 0
13351377
int loc2d, loc4c;
13361378
ubyte *loc3b;
13371379

@@ -1346,10 +1388,33 @@ void gpoly_stb_drw_incr4(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
13461388
*a2d = loc2d;
13471389
*a3b = loc3b - vec_map;
13481390
*a4c = loc4c;
1391+
#endif
1392+
ubyte a3b_h, a3b_l;
1393+
int a3b_bias;
1394+
ubyte loc_carry;
1395+
int loc_2d, loc_3bh;
1396+
1397+
a3b_bias = ((intptr_t)vec_map & 0xFFFF) + *a3b;
1398+
1399+
a3b_l = a3b_bias;
1400+
1401+
loc_carry = __CFADDL__(*a4c, st->var_0BC);
1402+
*a4c = *a4c + st->var_0BC;
1403+
loc_2d = *a2d + loc_carry;
1404+
1405+
loc_carry = __CFADDL__(loc_2d, st->var_0B8);
1406+
*a2d = loc_2d + st->var_0B8;
1407+
loc_3bh = ((uint)a3b_bias >> 8) + loc_carry;
1408+
1409+
a3b_h = loc_3bh + st->var_0B4;
1410+
a3b_bias = (a3b_h << 8) | a3b_l;
1411+
1412+
*a3b = a3b_bias - ((intptr_t)vec_map & 0xFFFF);
13491413
}
13501414

13511415
void gpoly_stb_drw_decr4(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
13521416
{
1417+
#if 0
13531418
int loc2d, loc4c;
13541419
ubyte *loc3b;
13551420

@@ -1364,6 +1429,28 @@ void gpoly_stb_drw_decr4(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
13641429
*a2d = loc2d;
13651430
*a3b = loc3b - vec_map;
13661431
*a4c = loc4c;
1432+
#endif
1433+
ubyte a3b_h, a3b_l;
1434+
int a3b_bias;
1435+
ubyte loc_carry;
1436+
int loc_2d, loc_3bh;
1437+
1438+
a3b_bias = ((intptr_t)vec_map & 0xFFFF) + *a3b;
1439+
1440+
a3b_l = a3b_bias;
1441+
1442+
loc_carry = __CFSUBL__(*a4c, st->var_0BC);
1443+
*a4c = *a4c - st->var_0BC;
1444+
loc_2d = *a2d - loc_carry;
1445+
1446+
loc_carry = __CFSUBL__(loc_2d, st->var_0B8);
1447+
*a2d = loc_2d - st->var_0B8;
1448+
loc_3bh = ((uint)a3b_bias >> 8) - loc_carry;
1449+
1450+
a3b_h = loc_3bh - st->var_0B4;
1451+
a3b_bias = (a3b_h << 8) | a3b_l;
1452+
1453+
*a3b = a3b_bias - ((intptr_t)vec_map & 0xFFFF);
13671454
}
13681455

13691456
void gpoly_stb_md05uni_var040_nz(struct gpoly_state *st)

0 commit comments

Comments
 (0)