@@ -35,17 +35,15 @@ typedef union {
35
35
#define CONST_PI 3.14159265358979323846
36
36
#define CONST_SQRT2 1.41421356237309504880
37
37
38
- #define MINFORMAT 0
39
- #define MAXFORMAT 2
40
38
#define MINMODE - 1
41
39
#define MAXMODE 8
42
40
#define NREPS 1000
43
41
44
42
/* Define default target formats. */
45
- // binary16 , bfloat16, TensorFloat-32
46
- static size_t precision [ ] = { 11 , 8 , 11 } ;
47
- static size_t emax [ ] = { 15 , 127 , 127 } ;
48
- static size_t emin [ ] = { - 14 , - 126 , - 126 } ;
43
+ // E5M2 , bfloat16
44
+ static size_t precision [ ] = { 3 , 8 } ;
45
+ static size_t emax [ ] = { 15 , 127 } ;
46
+ static size_t emin [ ] = { - 14 , - 126 } ;
49
47
static size_t nformats = 2 ;
50
48
51
49
/* Structure for options and fixtures. */
@@ -324,7 +322,7 @@ void check_equality_double(double *x, double *y, size_t n) {
324
322
for ( size_t j = 0 ; j < n ; j ++ ) {
325
323
if ( ! nan_safe_compare_double ( x [ j ] , y [ j ] ) ) {
326
324
printf ( "DOUBLE\n" ) ;
327
- printf ( "***\nj = %ld\nx = %23.15e [%" PRIu64 "]\ny = %23.15e [%" PRIu64 "]\n" ,
325
+ printf ( "***\nj = %ld\nx = %23.15e [%" PRIu64 "]\ny = %23.15e [%" PRIu64 "]\n" ,
328
326
j ,
329
327
x [ j ] , * ( uint64_t * ) ( x + j ) ,
330
328
y [ j ] , * ( uint64_t * ) ( y + j ) ) ;
@@ -338,7 +336,7 @@ void check_equality_double_int(double *x, int *y, size_t n) {
338
336
for ( size_t j = 0 ; j < n ; j ++ ) {
339
337
if ( ! nan_safe_compare_double ( x [ j ] , y [ j ] ) ) {
340
338
printf ( "DOUBLE\n" ) ;
341
- printf ( "***\nj = %ld\nx = %23.15e [%" PRIu64 "]\ny = %23.15e\n" ,
339
+ printf ( "***\nj = %ld\nx = %23.15e [%" PRIu64 "]\ny = %23.15e\n" ,
342
340
j , x [ j ] , * ( uint64_t * ) ( x + j ) , ( double ) y [ j ] ) ;
343
341
}
344
342
ck_assert ( nan_safe_compare_double ( x [ j ] , ( double ) y [ j ] ) ) ;
@@ -916,15 +914,15 @@ for (size_t mode = 1; mode < 3; mode++) {
916
914
double * zd = alloc_init_array_double ( xd , n ) ;
917
915
double * yd = allocate_array_double ( xd , n , mode ) ;
918
916
select_tests_det_double ( yd , xd , zd , n , fpopts ,
919
- MINMODE , MAXMODE , 0 , 2 , - 1 , - 1 ) ;
917
+ MINMODE , MAXMODE , 0 , nformats - 1 , - 1 , - 1 ) ;
920
918
free ( zd ) ;
921
919
free_array_double ( yd , mode ) ;
922
920
923
921
float xf [ ] = { 0 , - 0 , inf_float ( ) , - inf_float ( ) , nan_float ( ) } ;
924
922
float * zf = alloc_init_array_float ( xf , n ) ;
925
923
float * yf = allocate_array_float ( xf , n , mode ) ;
926
924
select_tests_det_float ( yf , xf , zf , n , fpopts ,
927
- MINMODE , MAXMODE , 0 , 2 , - 1 , - 1 ) ;
925
+ MINMODE , MAXMODE , 0 , nformats - 1 , - 1 , - 1 ) ;
928
926
free ( zf ) ;
929
927
free_array_float ( yf , mode ) ;
930
928
}
@@ -2624,13 +2622,16 @@ free(onef);
2624
2622
free ( exp ) ;
2625
2623
free ( lexp ) ;
2626
2624
2627
- #test enumeration_floating_point_numbers
2628
- printf ( "4 c . Next and previous floating - point number\n") ;
2625
+ #test floating_point_enumeration_subnormal_numbers
2626
+ printf ( "4 c . Next and previous floating - point number: subnormal numbers \n") ;
2627
+ fpopts - > infinity = CPFLOAT_INF_USE ;
2629
2628
fpopts - > explim = CPFLOAT_EXPRANGE_TARG ;
2629
+ fpopts - > saturation = CPFLOAT_SAT_NO ;
2630
2630
fpopts - > subnormal = CPFLOAT_SUBN_USE ;
2631
- // Subnormals
2632
- for ( size_t mode = 2 ; mode < 3 ; mode ++ ) {
2631
+ fpopts - > round = CPFLOAT_RND_NE ;
2632
+ for ( size_t mode = 3 ; mode < 3 ; mode ++ ) {
2633
2633
for ( size_t i = 0 ; i < nformats ; i ++ ) {
2634
+
2634
2635
fpopts - > precision = precision [ i ] ;
2635
2636
fpopts - > emax = emax [ i ] ;
2636
2637
fpopts - > emin = emin [ i ] ;
@@ -2653,8 +2654,10 @@ for (size_t mode = 2; mode < 3; mode++) {
2653
2654
copy_array_double ( ad , refd , n ) ;
2654
2655
cpf_nexttoward ( xd , ad , infl , n - 1 , fpopts ) ;
2655
2656
check_equality_double ( xd , refd + 1 , n - 1 ) ;
2657
+
2656
2658
csign_intarray_double ( ( uint64_t * ) infd , n ) ;
2657
- for ( size_t j = 0 ; j < n ; j ++ ) infl [ j ] = - infl [ j ] ;
2659
+ for ( size_t j = 0 ; j < n ; j ++ )
2660
+ infl [ j ] = - infl [ j ] ;
2658
2661
copy_array_double ( ad , refd , n ) ;
2659
2662
cpf_nextafter ( xd , ad + 1 , infd , n - 1 , fpopts ) ;
2660
2663
check_equality_double ( xd , refd , n - 1 ) ;
@@ -2669,8 +2672,10 @@ for (size_t mode = 2; mode < 3; mode++) {
2669
2672
copy_array_double ( ad , refd , n ) ;
2670
2673
cpf_nexttoward ( xd , ad + 1 , infl , n - 1 , fpopts ) ;
2671
2674
check_equality_double ( xd , refd , n - 1 ) ;
2675
+
2672
2676
csign_intarray_double ( ( uint64_t * ) infd , n ) ;
2673
- for ( size_t j = 0 ; j < n ; j ++ ) infl [ j ] = - infl [ j ] ;
2677
+ for ( size_t j = 0 ; j < n ; j ++ )
2678
+ infl [ j ] = - infl [ j ] ;
2674
2679
copy_array_double ( ad , refd , n ) ;
2675
2680
cpf_nextafter ( xd , ad , infd , n - 1 , fpopts ) ;
2676
2681
check_equality_double ( xd , refd + 1 , n - 1 ) ;
@@ -2729,8 +2734,9 @@ for (size_t mode = 2; mode < 3; mode++) {
2729
2734
}
2730
2735
}
2731
2736
2732
- // Normals
2733
- for ( size_t mode = 2 ; mode < 3 ; mode ++ ) {
2737
+ #test floating_point_enumeration_normal_numbers
2738
+ printf ( "4d. Next and previous floating-point number: normal numbers\n" ) ;
2739
+ for ( size_t mode = 3 ; mode < 3 ; mode ++ ) {
2734
2740
for ( size_t i = 0 ; i < nformats ; i ++ ) {
2735
2741
fpopts - > precision = precision [ i ] ;
2736
2742
fpopts - > emax = emax [ i ] ;
@@ -2839,12 +2845,10 @@ for (size_t mode = 2; mode < 3; mode++) {
2839
2845
}
2840
2846
2841
2847
#test integer_rounding
2842
- printf ( "4c. Integer rounding\n" ) ;
2843
-
2848
+ printf ( "4e. Integer rounding\n" ) ;
2844
2849
fpopts - > emax = emax [ 1 ] ;
2845
2850
fpopts - > emin = emin [ 1 ] ;
2846
2851
fpopts - > precision = precision [ 1 ] ;
2847
-
2848
2852
size_t n = 32 ;
2849
2853
double * xd = malloc ( n * sizeof ( * xd ) ) ;
2850
2854
int * xi = malloc ( n * sizeof ( * xi ) ) ;
@@ -3012,7 +3016,7 @@ check_equality_double_long_long(rd7, xll, n-3);
3012
3016
3013
3017
3014
3018
#test arithmetic_operations_large
3015
- printf ( "4d . Arithmetic operations on large arrays\n" ) ;
3019
+ printf ( "4f . Arithmetic operations on large arrays\n" ) ;
3016
3020
size_t i = 0 ;
3017
3021
fpopts - > precision = precision [ i ] ;
3018
3022
fpopts - > emax = emax [ i ] ;
0 commit comments