diff --git a/doc/tutorial.txt b/doc/tutorial.txt index 07dfa56e..b3a77324 100644 --- a/doc/tutorial.txt +++ b/doc/tutorial.txt @@ -276,25 +276,25 @@ inherit the affinity from the task level): util_min: Integer. Can be specified at task level or phase level. Sets the min utilization value the task can have; which helps boosting the task from -scheduler point of view. +scheduler point of view. Value -1 removes the clamp. util_max: Integer. Can be specified at task level or phase level. Sets the max utilization value the task can have; which helps capping the task from -scheduler point of view. +scheduler point of view. Value -1 removes the clamp. *** NUMA memory binding * nodes_membind: Array of Integer. Define the NUMA binding of the thread. -Default value is all NUMA nodes of the system. +Default value is all NUMA nodes of the system. An example : "nodes_membind" : [0, 2, 3] -"nodes_membind" Can be specified at task level or phase level. +"nodes_membind" Can be specified at task level or phase level. As an example, below creates two threads. -One thread will run with memory binding to nodes 2 and 3. -The second task will run first phase with memory binding to nodes 0 and 1, +One thread will run with memory binding to nodes 2 and 3. +The second task will run first phase with memory binding to nodes 0 and 1, second phase with memory binding to node 2. -Please note, that we follow an "event based policy", which means that -rt-app changes memory binding when there is a "nodes_membind" event +Please note, that we follow an "event based policy", which means that +rt-app changes memory binding when there is a "nodes_membind" event and don't do anything otherwise. "tasks" : { @@ -650,7 +650,7 @@ The generated events are of these main categories: - rtapp_loop: event=start thread_loop=0 phase=0 phase_loop=0 - rtapp_loop: event=end thread_loop=0 phase=0 phase_loop=0 - + Reporting the start and end of workload's phases and loops. * Workload's events, for example: @@ -756,5 +756,3 @@ below) 20000 ++--+----+---+----+---+---+----+---+----+--++ 494000 17560556057560556057560656065606756065606756075607 Loop start time [msec] - - diff --git a/src/rt-app.c b/src/rt-app.c index 673df444..0cbec6da 100644 --- a/src/rt-app.c +++ b/src/rt-app.c @@ -51,6 +51,12 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #define FORKS_LIMIT 1024 + +/* + * Maximum number of CPUs rt-app will be able to detect. + */ +#define MAX_CPUS 1024 + static volatile sig_atomic_t continue_running; static pthread_data_t *threads; static int nthreads; @@ -152,6 +158,7 @@ static int thread_data_create_unique_resources(thread_data_t *tdata, const threa static int create_thread(const thread_data_t *td, int index, int forked, int nforks) { thread_data_t *tdata; + sched_data_t *tsched_data; if (!td) { log_error("Failed to create new thread, passed NULL thread_data_t: %s", td->name); @@ -164,14 +171,25 @@ static int create_thread(const thread_data_t *td, int index, int forked, int nfo return -1; } + tsched_data = malloc(sizeof(sched_data_t)); + if (!tsched_data) { + log_error("Failed to duplicate thread sched data: %s", td->name); + return -1; + } + /* * We have one tdata created at config parse, but we * might spawn multiple threads if we were running in * a loop, so ensure we duplicate the tdata before * creating each thread, and we should free it at the - * end of the thread_body() + * end of the thread_body(). + * + * Also duplicate the sched_data since it is modified by each thread to + * keep track of current sched state. */ memcpy(tdata, td, sizeof(*tdata)); + tdata->sched_data = tsched_data; + memcpy(tdata->sched_data, td->sched_data, sizeof(*tdata->sched_data)); /* Mark this thread as forked */ tdata->forked = forked; @@ -733,6 +751,13 @@ static void __shutdown(bool force_terminate) { /* clean up tdata if this was a forked thread */ free(threads[i].data->name); + free(threads[i].data->sched_data); + if (threads[i].data->def_cpu_data.cpuset) + CPU_FREE(threads[i].data->def_cpu_data.cpuset); + + if (threads[i].data->def_cpu_data.cpuset_str) + free(threads[i].data->def_cpu_data.cpuset_str); + free(threads[i].data); } @@ -793,7 +818,7 @@ static int create_cpuset_str(cpuset_data_t *cpu_data) for (i = 0; i < 10000 && cpu_count; ++i) { unsigned int n; - if (CPU_ISSET(i, cpu_data->cpuset)) { + if (CPU_ISSET_S(cpu_data->cpusetsize, i, cpu_data->cpuset)) { --cpu_count; if (size_needed <= (idx + 1)) { log_error("Not enough memory for array"); @@ -830,21 +855,20 @@ static void set_thread_affinity(thread_data_t *data, cpuset_data_t *cpu_data) if (data->def_cpu_data.cpuset == NULL) { /* Get default affinity */ - cpu_set_t cpuset; - unsigned int cpu_count; + data->def_cpu_data.cpusetsize = CPU_ALLOC_SIZE(MAX_CPUS); + data->def_cpu_data.cpuset = CPU_ALLOC(MAX_CPUS); + if (!data->def_cpu_data.cpuset) { + perror("cpu_set_t malloc"); + exit(EXIT_FAILURE); + } ret = pthread_getaffinity_np(pthread_self(), - sizeof(cpu_set_t), &cpuset); + data->def_cpu_data.cpusetsize, data->def_cpu_data.cpuset); if (ret != 0) { errno = ret; perror("pthread_get_affinity"); exit(EXIT_FAILURE); } - cpu_count = CPU_COUNT(&cpuset); - data->def_cpu_data.cpusetsize = CPU_ALLOC_SIZE(cpu_count); - data->def_cpu_data.cpuset = CPU_ALLOC(cpu_count); - memcpy(data->def_cpu_data.cpuset, &cpuset, - data->def_cpu_data.cpusetsize); create_cpuset_str(&data->def_cpu_data); data->curr_cpu_data = &data->def_cpu_data; } @@ -861,8 +885,12 @@ static void set_thread_affinity(thread_data_t *data, cpuset_data_t *cpu_data) if (actual_cpu_data->cpuset == NULL) actual_cpu_data = &data->def_cpu_data; - if (!CPU_EQUAL(actual_cpu_data->cpuset, data->curr_cpu_data->cpuset)) - { + if ( + actual_cpu_data->cpusetsize != data->curr_cpu_data->cpusetsize || + !CPU_EQUAL_S( + actual_cpu_data->cpusetsize, + actual_cpu_data->cpuset, data->curr_cpu_data->cpuset)) + { log_debug("[%d] setting cpu affinity to CPU(s) %s", data->ind, actual_cpu_data->cpuset_str); ret = pthread_setaffinity_np(pthread_self(), @@ -901,12 +929,17 @@ static void set_thread_membind(thread_data_t *data, numaset_data_t * numa_data) * sched_priority is only meaningful for RT tasks. Otherwise, it must be * set to 0 for the setattr syscall to succeed. */ -static int __sched_priority(thread_data_t *data, sched_data_t *sched_data) +static int __sched_priority(thread_data_t *data, sched_data_t *sched_data, int curr_prio) { + int prio = sched_data->prio; + + if (prio == THREAD_PRIORITY_UNCHANGED) + prio = curr_prio; + switch (sched_data->policy) { case rr: case fifo: - return sched_data->prio; + return prio; } return 0; @@ -932,7 +965,13 @@ static bool __set_thread_policy_priority(thread_data_t *data, struct sched_param param; int ret; - param.sched_priority = __sched_priority(data, sched_data); + if (sched_data->prio == THREAD_PRIORITY_UNCHANGED) { + log_error("Cannot resolve the priority of the thread %s", + data->name); + exit(EXIT_FAILURE); + } + + param.sched_priority = __sched_priority(data, sched_data, sched_data->prio); ret = pthread_setschedparam(pthread_self(), sched_data->policy, @@ -1000,12 +1039,20 @@ static void _set_thread_rt(thread_data_t *data, sched_data_t *sched_data) } /* deadline can't rely on the default __set_thread_policy_priority */ -static void _set_thread_deadline(thread_data_t *data, sched_data_t *sched_data) +static void _set_thread_deadline(thread_data_t *data, sched_data_t *sched_data, + sched_data_t *curr_sched_data) { struct sched_attr sa_params = {0}; unsigned int flags = 0; pid_t tid; int ret; + int curr_prio; + + if (curr_sched_data) + curr_prio = curr_sched_data->prio; + else + /* The value does not matter as it will not be used */ + curr_prio = THREAD_PRIORITY_UNCHANGED; log_debug("[%d] setting scheduler %s exec %lu, deadline %lu" " period %lu", @@ -1023,7 +1070,7 @@ static void _set_thread_deadline(thread_data_t *data, sched_data_t *sched_data) sa_params.size = sizeof(struct sched_attr); sa_params.sched_flags = 0; sa_params.sched_policy = SCHED_DEADLINE; - sa_params.sched_priority = __sched_priority(data, sched_data); + sa_params.sched_priority = __sched_priority(data, sched_data, curr_prio); sa_params.sched_runtime = sched_data->runtime; sa_params.sched_deadline = sched_data->deadline; sa_params.sched_period = sched_data->period; @@ -1038,24 +1085,44 @@ static void _set_thread_deadline(thread_data_t *data, sched_data_t *sched_data) } } -static void _set_thread_uclamp(thread_data_t *data, sched_data_t *sched_data) +static void _set_thread_uclamp(thread_data_t *data, sched_data_t *sched_data, sched_data_t *curr_sched_data) { struct sched_attr sa_params = {0}; unsigned int flags = 0; pid_t tid; int ret; + policy_t policy; + int curr_prio; - if ((sched_data->util_min == -1 && - sched_data->util_max == -1)) + if ((sched_data->util_min == -2 && + sched_data->util_max == -2)) return; - sa_params.sched_policy = sched_data->policy; - sa_params.sched_priority = __sched_priority(data, sched_data); + if (curr_sched_data) { + if (sched_data->policy == same) + policy = curr_sched_data->policy; + else + policy = sched_data->policy; + + curr_prio = curr_sched_data->prio; + } else { + curr_prio = THREAD_PRIORITY_UNCHANGED; + } + + + if (policy == same) { + log_error("Cannot resolve the policy of the thread %s", + data->name); + exit(EXIT_FAILURE); + } + + sa_params.sched_policy = policy; + sa_params.sched_priority = __sched_priority(data, sched_data, curr_prio); sa_params.size = sizeof(struct sched_attr); sa_params.sched_flags = SCHED_FLAG_KEEP_ALL; tid = gettid(); - if (sched_data->util_min != -1) { + if (sched_data->util_min != -2) { sa_params.sched_util_min = sched_data->util_min; sa_params.sched_flags |= SCHED_FLAG_UTIL_CLAMP_MIN; log_debug("[%d] setting util_min=%d", @@ -1064,7 +1131,7 @@ static void _set_thread_uclamp(thread_data_t *data, sched_data_t *sched_data) "rtapp_attrs: event=uclamp util_min=%d", sched_data->util_min); } - if (sched_data->util_max != -1) { + if (sched_data->util_max != -2) { sa_params.sched_util_max = sched_data->util_max; sa_params.sched_flags |= SCHED_FLAG_UTIL_CLAMP_MAX; log_debug("[%d] setting util_max=%d", @@ -1086,30 +1153,37 @@ static void _set_thread_uclamp(thread_data_t *data, sched_data_t *sched_data) static void set_thread_param(thread_data_t *data, sched_data_t *sched_data) { + sched_data_t *curr_sched_data = data->curr_sched_data; + if (!sched_data) return; - if (data->curr_sched_data == sched_data) - return; + if (curr_sched_data) { + if (curr_sched_data == sched_data) + return; - /* if no policy is specified, reuse the previous one */ - if ((sched_data->policy == same) && data->curr_sched_data) - sched_data->policy = data->curr_sched_data->policy; + if (sched_data->prio == curr_sched_data->prio) + sched_data->prio = THREAD_PRIORITY_UNCHANGED; + + /* if no policy is specified, reuse the previous one */ + if (sched_data->policy == same) + sched_data->policy = curr_sched_data->policy; + } switch (sched_data->policy) { case rr: case fifo: _set_thread_rt(data, sched_data); - _set_thread_uclamp(data, sched_data); + _set_thread_uclamp(data, sched_data, curr_sched_data); break; case other: case idle: _set_thread_cfs(data, sched_data); - _set_thread_uclamp(data, sched_data); + _set_thread_uclamp(data, sched_data, curr_sched_data); data->lock_pages = 0; /* forced off */ break; case deadline: - _set_thread_deadline(data, sched_data); + _set_thread_deadline(data, sched_data, curr_sched_data); break; default: log_error("Unknown scheduling policy %d", @@ -1117,6 +1191,13 @@ static void set_thread_param(thread_data_t *data, sched_data_t *sched_data) exit(EXIT_FAILURE); } + /* Ensure we have an actual valid priority in curr_sched_data at all + * time, since it could be needed in a later phase for sched_setattr() + * syscall + */ + if (sched_data->prio == THREAD_PRIORITY_UNCHANGED) + sched_data->prio = curr_sched_data->prio; + data->curr_sched_data = sched_data; } diff --git a/src/rt-app_parse_config.c b/src/rt-app_parse_config.c index 3fc699bb..e228c698 100644 --- a/src/rt-app_parse_config.c +++ b/src/rt-app_parse_config.c @@ -764,17 +764,17 @@ static void parse_cpuset_data(struct json_object *obj, cpuset_data_t *data) data->cpuset_str = strdup(json_object_to_json_string(cpuset_obj)); data->cpusetsize = sizeof(cpu_set_t); data->cpuset = malloc(data->cpusetsize); - CPU_ZERO(data->cpuset); + CPU_ZERO_S(data->cpusetsize, data->cpuset); for (i = 0; i < json_object_array_length(cpuset_obj); i++) { cpu = json_object_array_get_idx(cpuset_obj, i); cpu_idx = json_object_get_int(cpu); if (cpu_idx > max_cpu) { log_critical(PIN2 "Invalid cpu %u in cpuset %s", cpu_idx, data->cpuset_str); - free(data->cpuset); + CPU_FREE(data->cpuset); free(data->cpuset_str); exit(EXIT_INV_CONFIG); } - CPU_SET(cpu_idx, data->cpuset); + CPU_SET_S(cpu_idx, data->cpusetsize, data->cpuset); } } else { data->cpuset_str = strdup("-"); @@ -875,14 +875,14 @@ static sched_data_t *parse_sched_data(struct json_object *obj, int def_policy) tmp_data.period = get_int_value_from(obj, "dl-period", TRUE, tmp_data.runtime); tmp_data.deadline = get_int_value_from(obj, "dl-deadline", TRUE, tmp_data.period); - /* clamping params (-1: no changes ) */ - tmp_data.util_min = get_int_value_from(obj, "util_min", TRUE, -1); - if (tmp_data.util_min != -1 && tmp_data.util_min > 1024) { + /* clamping params (-2: no changes ) */ + tmp_data.util_min = get_int_value_from(obj, "util_min", TRUE, -2); + if (tmp_data.util_min < -2 || tmp_data.util_min > 1024) { log_critical(PIN2 "Invalid util_min %d (>1024)", tmp_data.util_min); exit(EXIT_INV_CONFIG); } - tmp_data.util_max = get_int_value_from(obj, "util_max", TRUE, -1); - if (tmp_data.util_max != -1 && tmp_data.util_max > 1024) { + tmp_data.util_max = get_int_value_from(obj, "util_max", TRUE, -2); + if (tmp_data.util_max < -2 || tmp_data.util_max > 1024) { log_critical(PIN2 "Invalid util_max %d (>1024)", tmp_data.util_max); exit(EXIT_INV_CONFIG); } @@ -905,7 +905,7 @@ static sched_data_t *parse_sched_data(struct json_object *obj, int def_policy) /* Check if we found at least one meaningful scheduler parameter */ if (tmp_data.prio != THREAD_PRIORITY_UNCHANGED || tmp_data.runtime || tmp_data.period || tmp_data.deadline || - tmp_data.util_min != -1 || tmp_data.util_max != -1) { + tmp_data.util_min != -2 || tmp_data.util_max != -2) { sched_data_t *new_data; /* At least 1 parameters has been set in the object */