@@ -480,22 +480,54 @@ static inline ubyte __CFADDL__(long x, long y)
480480 return (ulong )(x ) > (ulong )(x + y );
481481}
482482
483+ /**
484+ * whether the subtraction (x-y) of two byte ints would use carry
485+ */
486+ static inline ubyte __CFSUBB__ (sbyte x , sbyte y )
487+ {
488+ return (ubyte )(x ) < (ubyte )(y );
489+ }
490+
491+ /**
492+ * whether the subtraction (x-y) of two long ints would use carry
493+ */
494+ static inline ubyte __CFSUBL__ (long x , long y )
495+ {
496+ return (ulong )(x ) < (ulong )(y );
497+ }
498+
483499int gpoly_mul_rot_1 (int a1 , int a2 )
484500{
501+ #if 1
485502 int ret ;
486503 asm volatile (
487- "call ASM_gpoly_mul_rot_1\n"
504+ "imul %%edx\n"
505+ "shl %%eax\n"
506+ "rcl %%edx\n"
507+ "mov %%dx,%%ax\n"
508+ "rol $0x10,%%eax\n"
509+ "jns jump_gpoly_mul_rot_1_noinc\n"
510+ "inc %%eax\n"
511+ "jump_gpoly_mul_rot_1_noinc:\n"
488512 : "=r" (ret ) : "a" (a1 ), "d" (a2 ));
489513 return ret ;
514+ #endif
490515}
491516
492517int gpoly_mul_rot_2 (int a1 , int a2 )
493518{
519+ #if 1
494520 int ret ;
495521 asm volatile (
496- "call ASM_gpoly_mul_rot_2\n"
522+ "imul %%edx\n"
523+ "mov %%dx,%%ax\n"
524+ "rol $0x10,%%eax\n"
525+ "jns jump_gpoly_mul_rot_2_noinc\n"
526+ "inc %%eax\n"
527+ "jump_gpoly_mul_rot_2_noinc:\n"
497528 : "=r" (ret ) : "a" (a1 ), "d" (a2 ));
498529 return ret ;
530+ #endif
499531}
500532
501533void gpoly_sta_md03 (struct gpoly_state * st )
@@ -1203,6 +1235,7 @@ void gpoly_sta_md28(struct gpoly_state *st)
12031235
12041236int gpoly_stb_drw_pixel (int * a2d , int * a3b , int * a4c , struct gpoly_state * st )
12051237{
1238+ #if 1
12061239 int ret ;
12071240 int loc2d , loc4c ;
12081241 ubyte * loc3b ;
@@ -1222,10 +1255,12 @@ int gpoly_stb_drw_pixel(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
12221255 * a3b = loc3b - vec_map ;
12231256 * a4c = loc4c ;
12241257 return ret & 0xFFFF ;
1258+ #endif
12251259}
12261260
12271261int gpoly_stb_drw_pixel2 (int * a2d , int * a3b , int * a4c , struct gpoly_state * st )
12281262{
1263+ #if 1
12291264 int ret ;
12301265 int loc2d , loc4c ;
12311266 ubyte * loc3b ;
@@ -1244,10 +1279,12 @@ int gpoly_stb_drw_pixel2(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
12441279 * a3b = loc3b - vec_map ;
12451280 * a4c = loc4c ;
12461281 return ret & 0xFF ;
1282+ #endif
12471283}
12481284
12491285void gpoly_stb_drw_incr1a (int * a2d , int * a3b , int * a4c , struct gpoly_state * st )
12501286{
1287+ #if 1
12511288 int loc2d , loc4c ;
12521289 ubyte * loc3b ;
12531290
@@ -1263,10 +1300,12 @@ void gpoly_stb_drw_incr1a(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
12631300 * a2d = loc2d ;
12641301 * a3b = loc3b - vec_map ;
12651302 * a4c = loc4c ;
1303+ #endif
12661304}
12671305
12681306void gpoly_stb_drw_incr1b (int * a2d , int * a3b , int * a4c , struct gpoly_state * st )
12691307{
1308+ #if 1
12701309 int loc2d , loc4c ;
12711310 ubyte * loc3b ;
12721311
@@ -1281,10 +1320,12 @@ void gpoly_stb_drw_incr1b(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
12811320 * a2d = loc2d ;
12821321 * a3b = loc3b - vec_map ;
12831322 * a4c = loc4c ;
1323+ #endif
12841324}
12851325
12861326void gpoly_stb_drw_incr2 (int * a2d , int * a3b , int * a4c , struct gpoly_state * st )
12871327{
1328+ #if 1
12881329 int loc2d , loc4c ;
12891330 ubyte * loc3b ;
12901331
@@ -1300,38 +1341,39 @@ void gpoly_stb_drw_incr2(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
13001341 * a2d = loc2d ;
13011342 * a3b = loc3b - vec_map ;
13021343 * a4c = loc4c ;
1344+ #endif
13031345}
13041346
13051347void gpoly_stb_drw_incr3 (int * a2d , int * a3b , int * a4c , struct gpoly_state * st )
13061348{
1307- int loc2d , loc4c ;
1308- ubyte * loc3b ;
1309- ubyte loc_0E4l , loc_0E4h ;
1310- ubyte v25 ;
1311- uint v29 , v30 ;
1349+ int a3b_bias ;
1350+ ubyte a3b_l , a3b_h ;
1351+ ubyte loc_carry ;
1352+ int loc_4c , loc_2d , loc_3bh ;
13121353
1313- loc2d = * a2d ;
1314- loc3b = vec_map + * a3b ;
1315- loc4c = * a4c ;
1354+ a3b_bias = ((intptr_t )vec_map & 0xFFFF ) + * a3b ;
13161355
1317- v25 = ((uint )loc3b & 0xFF ) < (st -> var_0A8 & 0xFF );
1318- loc_0E4l = ((uint )loc3b & 0xFF ) - (st -> var_0A8 & 0xFF );
1319- v29 = v25 + st -> var_0BC ;
1320- v25 = (uint )loc4c < v29 ;
1321- loc4c = loc4c - v29 ;
1322- v30 = v25 + st -> var_0B8 ;
1323- v25 = (uint )loc2d < v30 ;
1324- loc2d = loc2d - v30 ;
1325- loc_0E4h = (((uint )loc3b >> 8 ) & 0xFF ) - (v25 + (st -> var_0B4 & 0xFF ));
1326- loc3b = (ubyte * )(((uint )vec_map & ~0xFFFF ) | loc_0E4l | (loc_0E4h << 8 ));
1356+ loc_carry = __CFSUBB__ (a3b_bias , st -> var_0A8 );
1357+ a3b_l = a3b_bias - st -> var_0A8 ;
1358+ loc_4c = * a4c - loc_carry ;
13271359
1328- * a2d = loc2d ;
1329- * a3b = loc3b - vec_map ;
1330- * a4c = loc4c ;
1360+ loc_carry = __CFSUBL__ (loc_4c , st -> var_0BC );
1361+ * a4c = loc_4c - st -> var_0BC ;
1362+ loc_2d = * a2d - loc_carry ;
1363+
1364+ loc_carry = __CFSUBL__ (loc_2d , st -> var_0B8 );
1365+ * a2d = loc_2d - st -> var_0B8 ;
1366+ loc_3bh = ((uint )a3b_bias >> 8 ) - loc_carry ;
1367+
1368+ a3b_h = loc_3bh - st -> var_0B4 ;
1369+ a3b_bias = (a3b_h << 8 ) | a3b_l ;
1370+
1371+ * a3b = a3b_bias - ((intptr_t )vec_map & 0xFFFF );
13311372}
13321373
13331374void gpoly_stb_drw_incr4 (int * a2d , int * a3b , int * a4c , struct gpoly_state * st )
13341375{
1376+ #if 0
13351377 int loc2d , loc4c ;
13361378 ubyte * loc3b ;
13371379
@@ -1346,10 +1388,33 @@ void gpoly_stb_drw_incr4(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
13461388 * a2d = loc2d ;
13471389 * a3b = loc3b - vec_map ;
13481390 * a4c = loc4c ;
1391+ #endif
1392+ ubyte a3b_h , a3b_l ;
1393+ int a3b_bias ;
1394+ ubyte loc_carry ;
1395+ int loc_2d , loc_3bh ;
1396+
1397+ a3b_bias = ((intptr_t )vec_map & 0xFFFF ) + * a3b ;
1398+
1399+ a3b_l = a3b_bias ;
1400+
1401+ loc_carry = __CFADDL__ (* a4c , st -> var_0BC );
1402+ * a4c = * a4c + st -> var_0BC ;
1403+ loc_2d = * a2d + loc_carry ;
1404+
1405+ loc_carry = __CFADDL__ (loc_2d , st -> var_0B8 );
1406+ * a2d = loc_2d + st -> var_0B8 ;
1407+ loc_3bh = ((uint )a3b_bias >> 8 ) + loc_carry ;
1408+
1409+ a3b_h = loc_3bh + st -> var_0B4 ;
1410+ a3b_bias = (a3b_h << 8 ) | a3b_l ;
1411+
1412+ * a3b = a3b_bias - ((intptr_t )vec_map & 0xFFFF );
13491413}
13501414
13511415void gpoly_stb_drw_decr4 (int * a2d , int * a3b , int * a4c , struct gpoly_state * st )
13521416{
1417+ #if 0
13531418 int loc2d , loc4c ;
13541419 ubyte * loc3b ;
13551420
@@ -1364,6 +1429,28 @@ void gpoly_stb_drw_decr4(int *a2d, int *a3b, int *a4c, struct gpoly_state *st)
13641429 * a2d = loc2d ;
13651430 * a3b = loc3b - vec_map ;
13661431 * a4c = loc4c ;
1432+ #endif
1433+ ubyte a3b_h , a3b_l ;
1434+ int a3b_bias ;
1435+ ubyte loc_carry ;
1436+ int loc_2d , loc_3bh ;
1437+
1438+ a3b_bias = ((intptr_t )vec_map & 0xFFFF ) + * a3b ;
1439+
1440+ a3b_l = a3b_bias ;
1441+
1442+ loc_carry = __CFSUBL__ (* a4c , st -> var_0BC );
1443+ * a4c = * a4c - st -> var_0BC ;
1444+ loc_2d = * a2d - loc_carry ;
1445+
1446+ loc_carry = __CFSUBL__ (loc_2d , st -> var_0B8 );
1447+ * a2d = loc_2d - st -> var_0B8 ;
1448+ loc_3bh = ((uint )a3b_bias >> 8 ) - loc_carry ;
1449+
1450+ a3b_h = loc_3bh - st -> var_0B4 ;
1451+ a3b_bias = (a3b_h << 8 ) | a3b_l ;
1452+
1453+ * a3b = a3b_bias - ((intptr_t )vec_map & 0xFFFF );
13671454}
13681455
13691456void gpoly_stb_md05uni_var040_nz (struct gpoly_state * st )
0 commit comments