Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions build/bli_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@
#endif
#endif

#if @harden_barriers@
#define BLIS_HARDEN_BARRIERS
#endif

#if @enable_jrir_slab@
#define BLIS_ENABLE_JRIR_SLAB
#endif
Expand Down
2 changes: 0 additions & 2 deletions build/libblis-symbols.def
Original file line number Diff line number Diff line change
Expand Up @@ -990,8 +990,6 @@ bli_szcopysc
bli_szipsc
bli_szxpbym_md
bli_szxpbym_md_ex
bli_thrcomm_barrier
bli_thrcomm_bcast
bli_thread_get_ic_nt
bli_thread_get_ir_nt
bli_thread_get_jc_nt
Expand Down
24 changes: 24 additions & 0 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,15 @@ print_usage()
--disable-threading if you suspect any correctness or
deadlock issues.

--harden-barriers

Keep track of additional information which enables
run-time detection of various issues due to either misuse
of BLIS thrinfo_t structures (esp. via bli_thrinfo_barrier
and bli_thrinfo_bcast) or potentially to bugs in the
BLIS threading code itself. There may be a performance
penalty.

--disable-pba-pools, --enable-pba-pools
--disable-sba-pools, --enable-sba-pools

Expand Down Expand Up @@ -3027,6 +3036,9 @@ blis_main()
# The thread-local storage flag.
enable_tls='yes'

# Barrier hardening flag.
harden_barriers='no'

# The threading flag.
threading_model='off'

Expand Down Expand Up @@ -3228,6 +3240,10 @@ blis_main()
enable_tls='no'
;;

harden-barriers)
harden_barriers='yes'
;;

enable-threading=*)
threading_model=${OPTARG#*=}
;;
Expand Down Expand Up @@ -3803,6 +3819,13 @@ blis_main()
enable_tls_01=0
fi

# Check for barrier hardening.
harden_barriers_01=0
if [[ ${harden_barriers} = yes ]]; then
echo "${script_name}: Barriers will be hardened. There may be some performance impact."
harden_barriers_01=1
fi

# Check the threading model flag and standardize its value, if needed.
# Note that single-threaded mode will always be enabled, but not necessarily
# by default.
Expand Down Expand Up @@ -4456,6 +4479,7 @@ blis_main()
add_config_var kernel_list_defines
add_config_var omit_symbol_list_defines
add_config_var enable_tls enable_tls_01
add_config_var harden_barriers harden_barriers_01
add_config_var enable_openmp enable_openmp_01
add_config_var enable_openmp_as_def enable_openmp_as_def_01
add_config_var enable_pthreads enable_pthreads_01
Expand Down
44 changes: 36 additions & 8 deletions frame/thread/bli_thrcomm.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ void bli_thrcomm_cleanup( thrcomm_t* comm )
fp( comm );
}

void bli_thrcomm_barrier( dim_t tid, thrcomm_t* comm )
void bli_thrcomm_barrier( dim_t tid, thrcomm_t* comm, const char* tag )
{
const timpl_t ti = bli_thrcomm_thread_impl( comm );
const thrcomm_barrier_ft fp = barrier_fpa[ ti ];
Expand All @@ -182,25 +182,26 @@ void bli_thrcomm_barrier( dim_t tid, thrcomm_t* comm )
if ( fp == NULL ) bli_abort();

// Call the threading-specific barrier function.
fp( tid, comm );
fp( tid, comm, tag );
}

// -- Other functions ----------------------------------------------------------

void* bli_thrcomm_bcast
(
dim_t id,
void* to_send,
thrcomm_t* comm
dim_t id,
void* to_send,
thrcomm_t* comm,
const char* tag
)
{
if ( comm == NULL || comm->n_threads == 1 ) return to_send;

if ( id == 0 ) comm->sent_object = to_send;

bli_thrcomm_barrier( id, comm );
bli_thrcomm_barrier( id, comm, tag );
void* object = comm->sent_object;
bli_thrcomm_barrier( id, comm );
bli_thrcomm_barrier( id, comm, tag );

return object;
}
Expand All @@ -222,7 +223,7 @@ void* bli_thrcomm_bcast

#endif

void bli_thrcomm_barrier_atomic( dim_t t_id, thrcomm_t* comm )
void bli_thrcomm_barrier_atomic( dim_t t_id, thrcomm_t* comm, const char* tag )
{
// Return early if the comm is NULL or if there is only one
// thread participating.
Expand All @@ -238,6 +239,11 @@ void bli_thrcomm_barrier_atomic( dim_t t_id, thrcomm_t* comm )
// decremented back to 0, and so forth).
gint_t orig_sense = __atomic_load_n( &comm->barrier_sense, __ATOMIC_RELAXED );

#ifdef BLIS_HARDEN_BARRIERS
comm->status[ t_id ].barrier_sense = orig_sense;
comm->status[ t_id ].tag = tag;
#endif

// Register ourselves (the current thread) as having arrived by
// incrementing the barrier_threads_arrived variable. We must perform
// this increment (and a subsequent read) atomically.
Expand All @@ -248,13 +254,35 @@ void bli_thrcomm_barrier_atomic( dim_t t_id, thrcomm_t* comm )
// it will take actions that effectively ends and resets the barrier.
if ( my_threads_arrived == comm->n_threads )
{
#ifdef BLIS_HARDEN_BARRIERS
// Check that all threads a) called bli_thrinfo_barrier or
// bli_thrinfo_bcast from the same source location, and b)
// encountered the same original sense variable.
for ( dim_t i = 0;i < comm->n_threads; i++ )
{
if ( comm->status[ i ].barrier_sense != orig_sense ||
comm->status[ i ].tag != tag )
{
printf( "Inconsistency detected in barrier:\n" );
for ( dim_t j = 0;j < comm->n_threads; j++ )
printf( "Thread %d detected sense %lld at %s\n",
(int)(j+1), (long long)comm->status[ j ].barrier_sense, comm->status[ j ].tag );
bli_abort();
}
}
#endif

// Reset the variable tracking the number of threads that have arrived
// to zero (which returns the barrier to the "empty" state. Then
// atomically toggle the barrier sense variable. This will signal to
// the other threads (which are spinning in the branch elow) that it
// is now safe to exit the barrier.
comm->barrier_threads_arrived = 0;
#ifdef BLIS_HARDEN_BARRIERS
__atomic_fetch_add( &comm->barrier_sense, 1, __ATOMIC_RELEASE );
#else
__atomic_fetch_xor( &comm->barrier_sense, 1, __ATOMIC_RELEASE );
#endif
}
else
{
Expand Down
29 changes: 19 additions & 10 deletions frame/thread/bli_thrcomm.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,22 @@ typedef struct hpx_barrier_t
// Define the thrcomm_t structure, which will be common to all threading
// implementations.

typedef struct thrcomm_status_s
{
gint_t barrier_sense;
const char* tag;
char padding[ BLIS_CACHE_LINE_SIZE ];
} thrcomm_status_t;

typedef struct thrcomm_s
{
// -- Fields common to all threading implementations --

void* sent_object;
dim_t n_threads;
timpl_t ti;
void* sent_object;
dim_t n_threads;
timpl_t ti;
#ifdef BLIS_HARDEN_BARRIERS
thrcomm_status_t* status;
#endif

// We insert a cache line of padding here to eliminate false sharing between
// the fields above and fields below.
Expand Down Expand Up @@ -155,7 +164,7 @@ typedef struct thrcomm_s
// "overloaded" by each method of multithreading.
typedef void (*thrcomm_init_ft)( dim_t nt, thrcomm_t* comm );
typedef void (*thrcomm_cleanup_ft)( thrcomm_t* comm );
typedef void (*thrcomm_barrier_ft)( dim_t tid, thrcomm_t* comm );
typedef void (*thrcomm_barrier_ft)( dim_t tid, thrcomm_t* comm, const char* tag );


// thrcomm_t query (field only)
Expand All @@ -180,13 +189,13 @@ void bli_thrcomm_free( pool_t* sba_pool, thrcomm_t* comm );
// require the timpl_t as an argument. The threading-specific functions can
// (and do) omit the timpl_t from their function signatures since their
// threading implementation is intrinsically known.
void bli_thrcomm_init( timpl_t ti, dim_t n_threads, thrcomm_t* comm );
void bli_thrcomm_cleanup( thrcomm_t* comm );
BLIS_EXPORT_BLIS void bli_thrcomm_barrier( dim_t thread_id, thrcomm_t* comm );
void bli_thrcomm_init( timpl_t ti, dim_t n_threads, thrcomm_t* comm );
void bli_thrcomm_cleanup( thrcomm_t* comm );
void bli_thrcomm_barrier( dim_t thread_id, thrcomm_t* comm, const char* tag );

// Other function prototypes.
BLIS_EXPORT_BLIS void* bli_thrcomm_bcast( dim_t inside_id, void* to_send, thrcomm_t* comm );
void bli_thrcomm_barrier_atomic( dim_t thread_id, thrcomm_t* comm );
void* bli_thrcomm_bcast( dim_t inside_id, void* to_send, thrcomm_t* comm, const char* tag );
void bli_thrcomm_barrier_atomic( dim_t thread_id, thrcomm_t* comm, const char* tag );

#endif

5 changes: 3 additions & 2 deletions frame/thread/bli_thrcomm_hpx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ void hpx_barrier_destroy( hpx_barrier_t* barrier )
auto* barrier_ = reinterpret_cast<hpx::barrier<>*>( barrier->handle );
barrier->handle = nullptr;

delete barrier_;
delete barrier_;
}

void hpx_barrier_arrive_and_wait( hpx_barrier_t* barrier )
Expand Down Expand Up @@ -86,8 +86,9 @@ void bli_thrcomm_cleanup_hpx( thrcomm_t* comm )
hpx_barrier_destroy( &comm->barrier );
}

void bli_thrcomm_barrier_hpx( dim_t t_id, thrcomm_t* comm )
void bli_thrcomm_barrier_hpx( dim_t t_id, thrcomm_t* comm, const char* tag )
{
( void )tag;
hpx_barrier_arrive_and_wait( &comm->barrier );
}

Expand Down
2 changes: 1 addition & 1 deletion frame/thread/bli_thrcomm_hpx.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

void bli_thrcomm_init_hpx( dim_t nt, thrcomm_t* comm );
void bli_thrcomm_cleanup_hpx( thrcomm_t* comm );
void bli_thrcomm_barrier_hpx( dim_t tid, thrcomm_t* comm );
void bli_thrcomm_barrier_hpx( dim_t tid, thrcomm_t* comm, const char* tag );

#endif

Expand Down
25 changes: 18 additions & 7 deletions frame/thread/bli_thrcomm_openmp.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,26 @@ void bli_thrcomm_init_openmp( dim_t n_threads, thrcomm_t* comm )
comm->ti = BLIS_OPENMP;
comm->barrier_sense = 0;
comm->barrier_threads_arrived = 0;

#ifdef BLIS_HARDEN_BARRIERS
err_t r_val;
comm->status = ( thrcomm_status_t* )bli_malloc_intl( n_threads * sizeof( thrcomm_status_t ), &r_val );
#endif
}


void bli_thrcomm_cleanup_openmp( thrcomm_t* comm )
{
#ifdef BLIS_HARDEN_BARRIERS
bli_free_intl( comm->status );
#endif

return;
}

void bli_thrcomm_barrier_openmp( dim_t t_id, thrcomm_t* comm )
void bli_thrcomm_barrier_openmp( dim_t t_id, thrcomm_t* comm, const char* tag )
{
bli_thrcomm_barrier_atomic( t_id, comm );
bli_thrcomm_barrier_atomic( t_id, comm, tag );
}

#else
Expand Down Expand Up @@ -99,13 +108,13 @@ void bli_thrcomm_cleanup_openmp( thrcomm_t* comm )
bli_free_intl( comm->barriers );
}

void bli_thrcomm_barrier_openmp( dim_t t_id, thrcomm_t* comm )
void bli_thrcomm_barrier_openmp( dim_t t_id, thrcomm_t* comm, const char* tag )
{
// Return early if the comm is NULL or if there is only one
// thread participating.
if ( comm == NULL || comm->n_threads == 1 ) return;

bli_thrcomm_tree_barrier( comm->barriers[t_id] );
bli_thrcomm_tree_barrier( comm->barriers[t_id], tag );
}

// -- Helper functions ---------------------------------------------------------
Expand Down Expand Up @@ -146,10 +155,10 @@ barrier_t* bli_thrcomm_tree_barrier_create( int num_threads, int arity, barrier_
kid->dad = me;

leaf_index += threads_this_kid;
}
}
me->count = arity;
me->arity = arity;
}
}

return me;
}
Expand Down Expand Up @@ -184,8 +193,10 @@ void bli_thrcomm_tree_barrier_free( barrier_t* barrier )

#endif

void bli_thrcomm_tree_barrier( barrier_t* barack )
void bli_thrcomm_tree_barrier( barrier_t* barack, const char* tag )
{
//TODO

gint_t my_signal = __atomic_load_n( &barack->signal, __ATOMIC_RELAXED );

dim_t my_count =
Expand Down
4 changes: 2 additions & 2 deletions frame/thread/bli_thrcomm_openmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@
// OpenMP-specific function prototypes.
void bli_thrcomm_init_openmp( dim_t nt, thrcomm_t* comm );
void bli_thrcomm_cleanup_openmp( thrcomm_t* comm );
void bli_thrcomm_barrier_openmp( dim_t tid, thrcomm_t* comm );
void bli_thrcomm_barrier_openmp( dim_t tid, thrcomm_t* comm, const char* tag );

// Prototypes specific to the OpenMP tree barrier implementation.
#ifdef BLIS_TREE_BARRIER
barrier_t* bli_thrcomm_tree_barrier_create( int num_threads, int arity, barrier_t** leaves, int leaf_index );
void bli_thrcomm_tree_barrier_free( barrier_t* barrier );
void bli_thrcomm_tree_barrier( barrier_t* barack );
void bli_thrcomm_tree_barrier( barrier_t* barack, const char* tag );
#endif

#endif
Expand Down
13 changes: 11 additions & 2 deletions frame/thread/bli_thrcomm_pthreads.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,16 +82,25 @@ void bli_thrcomm_init_pthreads( dim_t n_threads, thrcomm_t* comm )
comm->ti = BLIS_POSIX;
comm->barrier_sense = 0;
comm->barrier_threads_arrived = 0;

#ifdef BLIS_HARDEN_BARRIERS
err_t r_val;
comm->status = ( thrcomm_status_t* )bli_malloc_intl( n_threads * sizeof( thrcomm_status_t ), &r_val );
#endif
}

void bli_thrcomm_cleanup_pthreads( thrcomm_t* comm )
{
#ifdef BLIS_HARDEN_BARRIERS
bli_free_intl( comm->status );
#endif

return;
}

void bli_thrcomm_barrier_pthreads( dim_t t_id, thrcomm_t* comm )
void bli_thrcomm_barrier_pthreads( dim_t t_id, thrcomm_t* comm, const char* tag )
{
bli_thrcomm_barrier_atomic( t_id, comm );
bli_thrcomm_barrier_atomic( t_id, comm, tag );
}

#endif
Expand Down
4 changes: 2 additions & 2 deletions frame/thread/bli_thrcomm_pthreads.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@
#define BLIS_THRCOMM_PTHREADS_H

// Define these prototypes for situations when POSIX multithreading is enabled.
#ifdef BLIS_ENABLE_PTHREADS
#ifdef BLIS_ENABLE_PTHREADS

// pthreads-specific function prototypes.
void bli_thrcomm_init_pthreads( dim_t nt, thrcomm_t* comm );
void bli_thrcomm_cleanup_pthreads( thrcomm_t* comm );
void bli_thrcomm_barrier_pthreads( dim_t tid, thrcomm_t* comm );
void bli_thrcomm_barrier_pthreads( dim_t tid, thrcomm_t* comm, const char* tag );

#endif

Expand Down
2 changes: 1 addition & 1 deletion frame/thread/bli_thrcomm_single.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ void bli_thrcomm_cleanup_single( thrcomm_t* comm )
if ( comm == NULL ) return;
}

void bli_thrcomm_barrier_single( dim_t t_id, thrcomm_t* comm )
void bli_thrcomm_barrier_single( dim_t t_id, thrcomm_t* comm, const char* tag )
{
return;
}
Expand Down
Loading