@@ -449,46 +449,63 @@ static int CountErrors(IOR_param_t * test, int access, int errors)
449449}
450450
451451/*
452- * Compares hostnames to determine the number of tasks per node
452+ * Count the number of tasks that share a host.
453+ *
454+ * This function employees the gethostname() call, rather than using
455+ * MPI_Get_processor_name(). We are interested in knowing the number
456+ * of tasks that share a file system client (I/O node, compute node,
457+ * whatever that may be). However on machines like BlueGene/Q,
458+ * MPI_Get_processor_name() uniquely identifies a cpu in a compute node,
459+ * not the node where the I/O is function shipped to. gethostname()
460+ * is assumed to identify the shared filesystem client in more situations.
461+ *
462+ * NOTE: This also assumes that the task count on all nodes is equal
463+ * to the task count on the host running MPI task 0.
453464 */
454465static int CountTasksPerNode (int numTasks , MPI_Comm comm )
455466{
456- char localhost [MAX_STR ], hostname [ MAX_STR ], taskOnNode [ MAX_STR ] ;
457- int count = 1 , resultsLen = MAX_STR , i ;
467+ char localhost [MAX_STR ];
468+ char hostname0 [ MAX_STR ] ;
458469 static int firstPass = TRUE;
459- MPI_Status status ;
460-
461- MPI_CHECK (MPI_Get_processor_name (localhost , & resultsLen ),
462- "cannot get processor name" );
470+ unsigned count ;
471+ unsigned flag ;
472+ int rc ;
463473
464474 if (verbose >= VERBOSE_2 && firstPass ) {
465- sprintf (taskOnNode , "task %d on %s" , rank , localhost );
466- OutputToRoot (numTasks , comm , taskOnNode );
475+ char tmp [MAX_STR ];
476+ sprintf (tmp , "task %d on %s" , rank , localhost );
477+ OutputToRoot (numTasks , comm , tmp );
467478 firstPass = FALSE;
468479 }
469480
470- if (numTasks > 1 ) {
471- if (rank == 0 ) {
472- /* MPI_receive all hostnames, and compare to local hostname */
473- for (i = 0 ; i < numTasks - 1 ; i ++ ) {
474- MPI_CHECK (MPI_Recv
475- (hostname , MAX_STR , MPI_CHAR ,
476- MPI_ANY_SOURCE , MPI_ANY_TAG , comm ,
477- & status ),
478- "cannot receive hostnames" );
479- if (strcmp (hostname , localhost ) == 0 )
480- count ++ ;
481- }
482- } else {
483- /* MPI_send hostname to root node */
484- MPI_CHECK (MPI_Send (localhost , MAX_STR , MPI_CHAR , 0 , 0 ,
485- comm ), "cannot send hostname" );
486- }
487- MPI_CHECK (MPI_Bcast (& count , 1 , MPI_INT , 0 , comm ),
488- "cannot broadcast tasks-per-node value" );
489- }
481+ rc = gethostname (localhost , MAX_STR );
482+ if (rc == -1 ) {
483+ /* This node won't match task 0's hostname...expect in the
484+ case where ALL gethostname() calls fail, in which
485+ case ALL nodes will appear to be on the same node.
486+ We'll handle that later. */
487+ localhost [0 ] = '\0' ;
488+ if (rank == 0 )
489+ perror ("gethostname() failed" );
490+ }
491+
492+ /* send task 0's hostname to all tasks */
493+ if (rank == 0 )
494+ strcpy (hostname0 , localhost );
495+ MPI_CHECK (MPI_Bcast (hostname0 , MAX_STR , MPI_CHAR , 0 , comm ),
496+ "broadcast of task 0's hostname failed" );
497+ if (strcmp (hostname0 , localhost ) == 0 )
498+ flag = 1 ;
499+ else
500+ flag = 0 ;
501+
502+ /* count the tasks share the same host as task 0 */
503+ MPI_Allreduce (& flag , & count , 1 , MPI_UNSIGNED , MPI_SUM , comm );
504+
505+ if (hostname0 [0 ] == '\0' )
506+ count = 1 ;
490507
491- return (count ) ;
508+ return (int ) count ;
492509}
493510
494511/*
0 commit comments