Skip to content

Commit 32365b3

Browse files
fgvanzeedzambare
authored andcommitted
Ensure random objects' 1-norms are non-zero.
Details: - Fixed an innocuous bug that manifested when running the testsuite on extremely small matrices with randomization via the "powers of 2 in narrow precision range" option enabled. When the randomization function emits a perfect 0.0 to fill a 1x1 matrix, the testsuite will then compute 0.0/0.0 during the normalization process, which leads to NaN residuals. The solution entails smarter implementaions of randv, randnv, randm, and randnm, each of which will compute the 1-norm of the vector or matrix in question. If the object has a 1-norm of 0.0, the object is re-randomized until the 1-norm is not 0.0. Thanks to Kiran Varaganti for reporting this issue (flame#413). - Updated the implementation of randm_unb_var1() so that it loops over a call to the randv_unb_var1() implementation directly rather than calling it indirectly via randv(). This was done to avoid the overhead of multiple calls to norm1v() when randomizing the rows/columns of a matrix. - Updated comments. Change-Id: I0e3d65ff97b26afde614da746e17ed33646839d1
1 parent ccf0772 commit 32365b3

3 files changed

Lines changed: 100 additions & 43 deletions

File tree

frame/include/level0/bli_randnp2s.h

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
{ \
4343
bli_drandnp2s( a ); \
4444
}
45+
46+
#if 0
4547
#define bli_drandnp2s_prev( a ) \
4648
{ \
4749
const double m_max = 3.0; \
@@ -95,6 +97,8 @@
9597
down to float. */ \
9698
a = r_val; \
9799
}
100+
#endif
101+
98102
#define bli_drandnp2s( a ) \
99103
{ \
100104
const double m_max = 6.0; \
@@ -108,15 +112,19 @@
108112
represents the largest power of two we will use to generate the
109113
random numbers. */ \
110114
\
111-
/* Generate a random real number t on the interval: [0.0, 6.0]. */ \
112-
t = ( ( double ) rand() / ( double ) RAND_MAX ) * m_max2; \
113-
\
114-
/* Modify t to guarantee that is never equal to the upper bound of
115-
the interval (in this case, 6.0). */ \
116-
if ( t == m_max2 ) t = t - 1.0; \
115+
do \
116+
{ \
117+
/* Generate a random real number t on the interval: [0.0, 6.0]. */ \
118+
t = ( ( double ) rand() / ( double ) RAND_MAX ) * m_max2; \
117119
\
118-
/* Transform the interval into the set of integers, {0,1,2,3,4,5}. */ \
119-
t = floor( t ); \
120+
/* Transform the interval into the set of integers, {0,1,2,3,4,5}.
121+
Note that 6 is prohibited by the loop guard below. */ \
122+
t = floor( t ); \
123+
} \
124+
/* If t is ever equal to m_max2, we re-randomize. The guard against
125+
m_max2 < t is for sanity and shouldn't happen, unless perhaps there
126+
is weirdness in the typecasting to double when computing t above. */ \
127+
while ( m_max2 <= t ); \
120128
\
121129
/* Map values of t == 0 to a final value of 0. */ \
122130
if ( t == 0.0 ) r_val = 0.0; \
@@ -126,7 +134,7 @@
126134
\
127135
double s_val; \
128136
\
129-
/* Compute r_val = 2^s where s = +/-(t-1) = {-4,-3,-2,-1,0}. */ \
137+
/* Compute r_val = 2^s where s = -(t-1) = {-4,-3,-2,-1,0}. */ \
130138
r_val = pow( 2.0, -(t - 1.0) ); \
131139
\
132140
/* Compute a random number to determine the sign of the final

frame/util/bli_util_tapi.c

Lines changed: 75 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -271,8 +271,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
271271
INSERT_GENTFUNC_BASIC_I( printm, fprintm )
272272

273273

274-
#undef GENTFUNC
275-
#define GENTFUNC( ctype, ch, opname ) \
274+
#undef GENTFUNCR
275+
#define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \
276276
\
277277
void PASTEMAC2(ch,opname,EX_SUF) \
278278
( \
@@ -291,23 +291,44 @@ void PASTEMAC2(ch,opname,EX_SUF) \
291291
/* Obtain a valid context from the gks if necessary. */ \
292292
/*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \
293293
\
294-
/* Invoke the helper variant, which loops over the appropriate kernel
295-
to implement the current operation. */ \
296-
PASTEMAC2(ch,opname,_unb_var1) \
297-
( \
298-
n, \
299-
x, incx, \
300-
cntx, \
301-
rntm \
302-
); \
294+
ctype_r norm; \
295+
\
296+
/* Set the norm to zero. */ \
297+
PASTEMAC(chr,set0s)( norm ); \
298+
\
299+
/* Iterate at least once, but continue iterating until the norm is not zero. */ \
300+
while ( PASTEMAC(chr,eq0)( norm ) ) \
301+
{ \
302+
/* Invoke the helper variant, which loops over the appropriate kernel
303+
to implement the current operation. */ \
304+
PASTEMAC2(ch,opname,_unb_var1) \
305+
( \
306+
n, \
307+
x, incx, \
308+
cntx, \
309+
rntm \
310+
); \
311+
\
312+
/* Check the 1-norm of the randomzied vector. In the unlikely event that
313+
the 1-norm is zero, it means that *all* elements are zero, in which
314+
case we want to re-randomize until the 1-norm is not zero. */ \
315+
PASTEMAC2(ch,norm1v,BLIS_TAPI_EX_SUF) \
316+
( \
317+
n, \
318+
x, incx, \
319+
&norm, \
320+
cntx, \
321+
rntm \
322+
); \
323+
} \
303324
}
304325

305-
INSERT_GENTFUNC_BASIC0( randv )
306-
INSERT_GENTFUNC_BASIC0( randnv )
326+
INSERT_GENTFUNCR_BASIC0( randv )
327+
INSERT_GENTFUNCR_BASIC0( randnv )
307328

308329

309-
#undef GENTFUNC
310-
#define GENTFUNC( ctype, ch, opname ) \
330+
#undef GENTFUNCR
331+
#define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \
311332
\
312333
void PASTEMAC2(ch,opname,EX_SUF) \
313334
( \
@@ -329,22 +350,47 @@ void PASTEMAC2(ch,opname,EX_SUF) \
329350
/* Obtain a valid context from the gks if necessary. */ \
330351
/*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \
331352
\
332-
/* Invoke the helper variant, which loops over the appropriate kernel
333-
to implement the current operation. */ \
334-
PASTEMAC2(ch,opname,_unb_var1) \
335-
( \
336-
diagoffx, \
337-
uplox, \
338-
m, \
339-
n, \
340-
x, rs_x, cs_x, \
341-
cntx, \
342-
rntm \
343-
); \
353+
ctype_r norm; \
354+
\
355+
/* Set the norm to zero. */ \
356+
PASTEMAC(chr,set0s)( norm ); \
357+
\
358+
/* Iterate at least once, but continue iterating until the norm is not zero. */ \
359+
while ( PASTEMAC(chr,eq0)( norm ) ) \
360+
{ \
361+
/* Invoke the helper variant, which loops over the appropriate kernel
362+
to implement the current operation. */ \
363+
PASTEMAC2(ch,opname,_unb_var1) \
364+
( \
365+
diagoffx, \
366+
uplox, \
367+
m, \
368+
n, \
369+
x, rs_x, cs_x, \
370+
cntx, \
371+
rntm \
372+
); \
373+
\
374+
/* Check the 1-norm of the randomzied matrix. In the unlikely event that
375+
the 1-norm is zero, it means that *all* elements are zero, in which
376+
case we want to re-randomize until the 1-norm is not zero. */ \
377+
PASTEMAC2(ch,norm1m,BLIS_TAPI_EX_SUF) \
378+
( \
379+
diagoffx, \
380+
BLIS_NONUNIT_DIAG, \
381+
uplox, \
382+
m, \
383+
n, \
384+
x, rs_x, cs_x, \
385+
&norm, \
386+
cntx, \
387+
rntm \
388+
); \
389+
} \
344390
}
345391

346-
INSERT_GENTFUNC_BASIC0( randm )
347-
INSERT_GENTFUNC_BASIC0( randnm )
392+
INSERT_GENTFUNCR_BASIC0( randm )
393+
INSERT_GENTFUNCR_BASIC0( randnm )
348394

349395

350396
#undef GENTFUNCR

frame/util/bli_util_unb_var1.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,7 +1019,8 @@ void PASTEMAC(ch,varname) \
10191019
\
10201020
x1 = x + (j )*ldx + (0 )*incx; \
10211021
\
1022-
PASTEMAC2(ch,kername,BLIS_TAPI_EX_SUF) \
1022+
/*PASTEMAC2(ch,kername,BLIS_TAPI_EX_SUF)*/ \
1023+
PASTEMAC(ch,kername) \
10231024
( \
10241025
n_elem, \
10251026
x1, incx, \
@@ -1046,7 +1047,8 @@ void PASTEMAC(ch,varname) \
10461047
x0 = x1; \
10471048
chi1 = x1 + (n_elem-1)*incx; \
10481049
\
1049-
PASTEMAC2(ch,kername,BLIS_TAPI_EX_SUF) \
1050+
/*PASTEMAC2(ch,kername,BLIS_TAPI_EX_SUF)*/ \
1051+
PASTEMAC(ch,kername) \
10501052
( \
10511053
n_elem, \
10521054
x1, incx, \
@@ -1086,7 +1088,8 @@ void PASTEMAC(ch,varname) \
10861088
x2 = x1 + incx; \
10871089
chi1 = x1; \
10881090
\
1089-
PASTEMAC2(ch,kername,BLIS_TAPI_EX_SUF) \
1091+
/*PASTEMAC2(ch,kername,BLIS_TAPI_EX_SUF)*/ \
1092+
PASTEMAC(ch,kername) \
10901093
( \
10911094
n_elem, \
10921095
x1, incx, \
@@ -1118,8 +1121,8 @@ void PASTEMAC(ch,varname) \
11181121
} \
11191122
}
11201123

1121-
INSERT_GENTFUNC_BASIC( randm_unb_var1, randv )
1122-
INSERT_GENTFUNC_BASIC( randnm_unb_var1, randnv )
1124+
INSERT_GENTFUNC_BASIC( randm_unb_var1, randv_unb_var1 )
1125+
INSERT_GENTFUNC_BASIC( randnm_unb_var1, randnv_unb_var1 )
11231126

11241127

11251128
#undef GENTFUNCR

0 commit comments

Comments
 (0)