Skip to content

Commit 86df587

Browse files
committed
Improve scalability of CountTasksPerNode().
Improve the scalabilit of CountTasksPerNode() by using a Broadcast and AllReduce, rather than flooding task zero with MPI_Send() messages. Also change the hostname lookup function from MPI_Get_processor_name to gethostname(), which should work on most systems that I know of, including BlueGene/Q.
1 parent 8426465 commit 86df587

File tree

1 file changed

+47
-30
lines changed

1 file changed

+47
-30
lines changed

src/ior.c

Lines changed: 47 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -449,46 +449,63 @@ static int CountErrors(IOR_param_t * test, int access, int errors)
449449
}
450450

451451
/*
452-
* Compares hostnames to determine the number of tasks per node
452+
* Count the number of tasks that share a host.
453+
*
454+
* This function employees the gethostname() call, rather than using
455+
* MPI_Get_processor_name(). We are interested in knowing the number
456+
* of tasks that share a file system client (I/O node, compute node,
457+
* whatever that may be). However on machines like BlueGene/Q,
458+
* MPI_Get_processor_name() uniquely identifies a cpu in a compute node,
459+
* not the node where the I/O is function shipped to. gethostname()
460+
* is assumed to identify the shared filesystem client in more situations.
461+
*
462+
* NOTE: This also assumes that the task count on all nodes is equal
463+
* to the task count on the host running MPI task 0.
453464
*/
454465
static int CountTasksPerNode(int numTasks, MPI_Comm comm)
455466
{
456-
char localhost[MAX_STR], hostname[MAX_STR], taskOnNode[MAX_STR];
457-
int count = 1, resultsLen = MAX_STR, i;
467+
char localhost[MAX_STR];
468+
char hostname0[MAX_STR];
458469
static int firstPass = TRUE;
459-
MPI_Status status;
460-
461-
MPI_CHECK(MPI_Get_processor_name(localhost, &resultsLen),
462-
"cannot get processor name");
470+
unsigned count;
471+
unsigned flag;
472+
int rc;
463473

464474
if (verbose >= VERBOSE_2 && firstPass) {
465-
sprintf(taskOnNode, "task %d on %s", rank, localhost);
466-
OutputToRoot(numTasks, comm, taskOnNode);
475+
char tmp[MAX_STR];
476+
sprintf(tmp, "task %d on %s", rank, localhost);
477+
OutputToRoot(numTasks, comm, tmp);
467478
firstPass = FALSE;
468479
}
469480

470-
if (numTasks > 1) {
471-
if (rank == 0) {
472-
/* MPI_receive all hostnames, and compare to local hostname */
473-
for (i = 0; i < numTasks - 1; i++) {
474-
MPI_CHECK(MPI_Recv
475-
(hostname, MAX_STR, MPI_CHAR,
476-
MPI_ANY_SOURCE, MPI_ANY_TAG, comm,
477-
&status),
478-
"cannot receive hostnames");
479-
if (strcmp(hostname, localhost) == 0)
480-
count++;
481-
}
482-
} else {
483-
/* MPI_send hostname to root node */
484-
MPI_CHECK(MPI_Send(localhost, MAX_STR, MPI_CHAR, 0, 0,
485-
comm), "cannot send hostname");
486-
}
487-
MPI_CHECK(MPI_Bcast(&count, 1, MPI_INT, 0, comm),
488-
"cannot broadcast tasks-per-node value");
489-
}
481+
rc = gethostname(localhost, MAX_STR);
482+
if (rc == -1) {
483+
/* This node won't match task 0's hostname...expect in the
484+
case where ALL gethostname() calls fail, in which
485+
case ALL nodes will appear to be on the same node.
486+
We'll handle that later. */
487+
localhost[0] = '\0';
488+
if (rank == 0)
489+
perror("gethostname() failed");
490+
}
491+
492+
/* send task 0's hostname to all tasks */
493+
if (rank == 0)
494+
strcpy(hostname0, localhost);
495+
MPI_CHECK(MPI_Bcast(hostname0, MAX_STR, MPI_CHAR, 0, comm),
496+
"broadcast of task 0's hostname failed");
497+
if (strcmp(hostname0, localhost) == 0)
498+
flag = 1;
499+
else
500+
flag = 0;
501+
502+
/* count the tasks share the same host as task 0 */
503+
MPI_Allreduce(&flag, &count, 1, MPI_UNSIGNED, MPI_SUM, comm);
504+
505+
if (hostname0[0] == '\0')
506+
count = 1;
490507

491-
return (count);
508+
return (int)count;
492509
}
493510

494511
/*

0 commit comments

Comments
 (0)