6767#include <unistd.h>
6868#include <math.h>
6969
70+ // FIXME: Good idea to recycle the task groups, so this will limit
71+ // the depth of nested coforall s and cobegins. Fix to use an ATMI
72+ // environment variable.
73+ #define max_num_task_groups 8
7074#define max_num_cpu_kernels 4096
7175int cpu_kernels_initialized [max_num_cpu_kernels ] = {0 };
7276atmi_kernel_t cpu_kernels [max_num_cpu_kernels ];
@@ -759,6 +763,11 @@ void chpl_task_init(void)
759763 atmi_kernel_create_empty (& dummy_kernel , 0 , NULL );
760764 atmi_kernel_add_cpu_impl (dummy_kernel , (atmi_generic_fp )dummy_wrapper , CPU_FUNCTION_IMPL );
761765
766+ // this increment needed to let the main task not be treated as an ATMI task
767+ // because we directly launch the main task with the main thread and dont treat
768+ // it as an ATMI task. This increment fools the rest of the runtime to think that
769+ // the main task is an ATMI task, but it is in fact not an ATMI task.
770+ int next_id = atomic_fetch_add_explicit_uint_least64_t (& atmi_tg_id , 1 , memory_order_relaxed );
762771 atmi_task_handle_t dummy_handle = atmi_task_create (dummy_kernel );
763772 int32_t commMaxThreads ;
764773 int32_t hwpar ;
@@ -968,15 +977,16 @@ int chpl_task_createCommTask(chpl_fn_p fn,
968977
969978void * chpl_taskGroupGet () {
970979 void * ret = (void * )get_atmi_task_group ();
971- printf ("Adding to task group: %p\n" , ret );
972980 return ret ;
973981}
974982
975983void * chpl_taskGroupInit (int lineno , int32_t filename ) {
976984 atmi_task_group_t * tg = (atmi_task_group_t * )chpl_malloc (sizeof (atmi_task_group_t ));
977985 // TODO: add to a list of task groups and free all of them at the very end.
978986 int next_id = atomic_fetch_add_explicit_uint_least64_t (& atmi_tg_id , 1 , memory_order_relaxed );
979- printf ("Next task group ID: %d\n" , next_id );
987+ // loop around the task groups.
988+ // FIXME: how will this affect the main task that is not an ATMI task? Incr by 1 after this?
989+ next_id %= max_num_task_groups ;
980990 tg -> id = next_id ;
981991 tg -> ordered = ATMI_FALSE ;
982992 return tg ;
@@ -1002,7 +1012,6 @@ void chpl_taskGroupFinalize(void *tg) {
10021012 }
10031013 else {
10041014 // if I am not within a task (main task), simply sync
1005- printf ("Waiting for task group: %p\n" , tg );
10061015 atmi_task_group_sync ((atmi_task_group_t * )tg );
10071016 }
10081017 //chpl_free(tg);
@@ -1048,8 +1057,8 @@ void chpl_task_addToTaskList(chpl_fn_int_t fid,
10481057 ATMI_LPARM_CPU (lparm , cpu_id );
10491058 lparm -> kernel_id = CPU_FUNCTION_IMPL ;
10501059 //lparm->synchronous = ATMI_TRUE;
1060+ lparm -> groupable = ATMI_TRUE ;
10511061 if (task_group ) {
1052- lparm -> groupable = ATMI_TRUE ;
10531062 lparm -> group = (atmi_task_group_t * )task_group ;
10541063 }
10551064
@@ -1080,7 +1089,6 @@ static inline void taskCallBody(chpl_fn_int_t fid, chpl_fn_name fname, chpl_fn_p
10801089 c_sublocid_t subloc , chpl_bool serial_state ,
10811090 int lineno , int32_t filename )
10821091{
1083- //printf("Adding %d fn to task call body\n", fid);
10841092 chpl_task_bundle_t * bundle = (chpl_task_bundle_t * ) arg ;
10851093
10861094 bundle -> serial_state = serial_state ;
@@ -1252,7 +1260,8 @@ uint32_t chpl_task_getMaxPar(void) {
12521260 // will decide itself how much parallelism to create across and
12531261 // within sublocales, if there are any.
12541262 //
1255- return (uint32_t ) 8 ;//qthread_num_workers();
1263+ return qthread_num_workers ();
1264+ //return (uint32_t) g_machine->devices_by_type[ATMI_DEVTYPE_CPU][0].core_count;
12561265}
12571266
12581267c_sublocid_t chpl_task_getNumSublocales (void )
@@ -1294,7 +1303,8 @@ int32_t chpl_task_getNumBlockedTasks(void)
12941303
12951304uint32_t chpl_task_getNumThreads (void )
12961305{
1297- return (uint32_t ) 8 ;//qthread_num_workers();
1306+ return qthread_num_workers ();
1307+ //return (uint32_t) g_machine->devices_by_type[ATMI_DEVTYPE_CPU][0].core_count;
12981308}
12991309
13001310// Ew. Talk about excessive bookkeeping.
0 commit comments