Skip to content

Commit 5dd51ad

Browse files
committed
Recycling task groups and at the same time, fixing the max depth of nesting of coforall and cobegin blocks
1 parent b764626 commit 5dd51ad

File tree

1 file changed

+17
-7
lines changed

1 file changed

+17
-7
lines changed

runtime/src/tasks/atmi/tasks-atmi.c

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@
6767
#include <unistd.h>
6868
#include <math.h>
6969

70+
// FIXME: Good idea to recycle the task groups, so this will limit
71+
// the depth of nested coforall s and cobegins. Fix to use an ATMI
72+
// environment variable.
73+
#define max_num_task_groups 8
7074
#define max_num_cpu_kernels 4096
7175
int cpu_kernels_initialized[max_num_cpu_kernels] = {0};
7276
atmi_kernel_t cpu_kernels[max_num_cpu_kernels];
@@ -759,6 +763,11 @@ void chpl_task_init(void)
759763
atmi_kernel_create_empty(&dummy_kernel, 0, NULL);
760764
atmi_kernel_add_cpu_impl(dummy_kernel, (atmi_generic_fp)dummy_wrapper, CPU_FUNCTION_IMPL);
761765

766+
// this increment needed to let the main task not be treated as an ATMI task
767+
// because we directly launch the main task with the main thread and dont treat
768+
// it as an ATMI task. This increment fools the rest of the runtime to think that
769+
// the main task is an ATMI task, but it is in fact not an ATMI task.
770+
int next_id = atomic_fetch_add_explicit_uint_least64_t(&atmi_tg_id, 1, memory_order_relaxed);
762771
atmi_task_handle_t dummy_handle = atmi_task_create(dummy_kernel);
763772
int32_t commMaxThreads;
764773
int32_t hwpar;
@@ -968,15 +977,16 @@ int chpl_task_createCommTask(chpl_fn_p fn,
968977

969978
void *chpl_taskGroupGet() {
970979
void *ret = (void *)get_atmi_task_group();
971-
printf("Adding to task group: %p\n", ret);
972980
return ret;
973981
}
974982

975983
void *chpl_taskGroupInit(int lineno, int32_t filename) {
976984
atmi_task_group_t *tg = (atmi_task_group_t *)chpl_malloc(sizeof(atmi_task_group_t));
977985
// TODO: add to a list of task groups and free all of them at the very end.
978986
int next_id = atomic_fetch_add_explicit_uint_least64_t(&atmi_tg_id, 1, memory_order_relaxed);
979-
printf("Next task group ID: %d\n", next_id);
987+
// loop around the task groups.
988+
// FIXME: how will this affect the main task that is not an ATMI task? Incr by 1 after this?
989+
next_id %= max_num_task_groups;
980990
tg->id = next_id;
981991
tg->ordered = ATMI_FALSE;
982992
return tg;
@@ -1002,7 +1012,6 @@ void chpl_taskGroupFinalize(void *tg) {
10021012
}
10031013
else {
10041014
// if I am not within a task (main task), simply sync
1005-
printf("Waiting for task group: %p\n", tg);
10061015
atmi_task_group_sync((atmi_task_group_t *)tg);
10071016
}
10081017
//chpl_free(tg);
@@ -1048,8 +1057,8 @@ void chpl_task_addToTaskList(chpl_fn_int_t fid,
10481057
ATMI_LPARM_CPU(lparm, cpu_id);
10491058
lparm->kernel_id = CPU_FUNCTION_IMPL;
10501059
//lparm->synchronous = ATMI_TRUE;
1060+
lparm->groupable = ATMI_TRUE;
10511061
if(task_group) {
1052-
lparm->groupable = ATMI_TRUE;
10531062
lparm->group = (atmi_task_group_t *)task_group;
10541063
}
10551064

@@ -1080,7 +1089,6 @@ static inline void taskCallBody(chpl_fn_int_t fid, chpl_fn_name fname, chpl_fn_p
10801089
c_sublocid_t subloc, chpl_bool serial_state,
10811090
int lineno, int32_t filename)
10821091
{
1083-
//printf("Adding %d fn to task call body\n", fid);
10841092
chpl_task_bundle_t *bundle = (chpl_task_bundle_t*) arg;
10851093

10861094
bundle->serial_state = serial_state;
@@ -1252,7 +1260,8 @@ uint32_t chpl_task_getMaxPar(void) {
12521260
// will decide itself how much parallelism to create across and
12531261
// within sublocales, if there are any.
12541262
//
1255-
return (uint32_t) 8;//qthread_num_workers();
1263+
return qthread_num_workers();
1264+
//return (uint32_t) g_machine->devices_by_type[ATMI_DEVTYPE_CPU][0].core_count;
12561265
}
12571266

12581267
c_sublocid_t chpl_task_getNumSublocales(void)
@@ -1294,7 +1303,8 @@ int32_t chpl_task_getNumBlockedTasks(void)
12941303

12951304
uint32_t chpl_task_getNumThreads(void)
12961305
{
1297-
return (uint32_t) 8;//qthread_num_workers();
1306+
return qthread_num_workers();
1307+
//return (uint32_t) g_machine->devices_by_type[ATMI_DEVTYPE_CPU][0].core_count;
12981308
}
12991309

13001310
// Ew. Talk about excessive bookkeeping.

0 commit comments

Comments
 (0)