diff --git a/doc/tutorial.txt b/doc/tutorial.txt
index 07dfa56e..b3a77324 100644
--- a/doc/tutorial.txt
+++ b/doc/tutorial.txt
@@ -276,25 +276,25 @@ inherit the affinity from the task level):
 
 util_min: Integer. Can be specified at task level or phase level. Sets the min
 utilization value the task can have; which helps boosting the task from
-scheduler point of view.
+scheduler point of view. Value -1 removes the clamp.
 
 util_max: Integer. Can be specified at task level or phase level. Sets the max
 utilization value the task can have; which helps capping the task from
-scheduler point of view.
+scheduler point of view. Value -1 removes the clamp.
 
 *** NUMA memory binding
 
 * nodes_membind: Array of Integer. Define the NUMA binding of the thread.
-Default value is all NUMA nodes of the system. 
+Default value is all NUMA nodes of the system.
 An example : "nodes_membind" : [0, 2, 3]
 
-"nodes_membind" Can be specified at task level or phase level. 
+"nodes_membind" Can be specified at task level or phase level.
 As an example, below creates two threads.
-One thread will run with memory binding to nodes 2 and 3. 
-The second task will run first phase with memory binding to nodes 0 and 1, 
+One thread will run with memory binding to nodes 2 and 3.
+The second task will run first phase with memory binding to nodes 0 and 1,
 second phase with memory binding to node 2.
-Please note, that we follow an "event based policy", which means that 
-rt-app changes memory binding when there is a "nodes_membind" event 
+Please note, that we follow an "event based policy", which means that
+rt-app changes memory binding when there is a "nodes_membind" event
 and don't do anything otherwise.
 
 "tasks" : {
@@ -650,7 +650,7 @@ The generated events are of these main categories:
 
   - rtapp_loop: event=start thread_loop=0 phase=0 phase_loop=0
   - rtapp_loop: event=end thread_loop=0 phase=0 phase_loop=0
-  
+
   Reporting the start and end of workload's phases and loops.
 
 * Workload's events, for example:
@@ -756,5 +756,3 @@ below)
    20000 ++--+----+---+----+---+---+----+---+----+--++ 494000
        17560556057560556057560656065606756065606756075607
                     Loop start time [msec]
-
-
diff --git a/src/rt-app.c b/src/rt-app.c
index 673df444..0cbec6da 100644
--- a/src/rt-app.c
+++ b/src/rt-app.c
@@ -51,6 +51,12 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 #define FORKS_LIMIT		1024
 
+
+/*
+ * Maximum number of CPUs rt-app will be able to detect.
+ */
+#define MAX_CPUS		1024
+
 static volatile sig_atomic_t continue_running;
 static pthread_data_t *threads;
 static int nthreads;
@@ -152,6 +158,7 @@ static int thread_data_create_unique_resources(thread_data_t *tdata, const threa
 static int create_thread(const thread_data_t *td, int index, int forked, int nforks)
 {
 	thread_data_t *tdata;
+	sched_data_t *tsched_data;
 
 	if (!td) {
 		log_error("Failed to create new thread, passed NULL thread_data_t: %s", td->name);
@@ -164,14 +171,25 @@ static int create_thread(const thread_data_t *td, int index, int forked, int nfo
 		return -1;
 	}
 
+	tsched_data = malloc(sizeof(sched_data_t));
+	if (!tsched_data) {
+		log_error("Failed to duplicate thread sched data: %s", td->name);
+		return -1;
+	}
+
 	/*
 	 * We have one tdata created at config parse, but we
 	 * might spawn multiple threads if we were running in
 	 * a loop, so ensure we duplicate the tdata before
 	 * creating each thread, and we should free it at the
-	 * end of the thread_body()
+	 * end of the thread_body().
+	 *
+	 * Also duplicate the sched_data since it is modified by each thread to
+	 * keep track of current sched state.
 	 */
 	memcpy(tdata, td, sizeof(*tdata));
+	tdata->sched_data = tsched_data;
+	memcpy(tdata->sched_data, td->sched_data, sizeof(*tdata->sched_data));
 
 	/* Mark this thread as forked */
 	tdata->forked = forked;
@@ -733,6 +751,13 @@ static void __shutdown(bool force_terminate)
 	{
 		/* clean up tdata if this was a forked thread */
 		free(threads[i].data->name);
+		free(threads[i].data->sched_data);
+		if (threads[i].data->def_cpu_data.cpuset)
+			CPU_FREE(threads[i].data->def_cpu_data.cpuset);
+
+		if (threads[i].data->def_cpu_data.cpuset_str)
+			free(threads[i].data->def_cpu_data.cpuset_str);
+
 		free(threads[i].data);
 	}
 
@@ -793,7 +818,7 @@ static int create_cpuset_str(cpuset_data_t *cpu_data)
 	for (i = 0; i < 10000 && cpu_count; ++i) {
 		unsigned int n;
 
-		if (CPU_ISSET(i, cpu_data->cpuset)) {
+		if (CPU_ISSET_S(cpu_data->cpusetsize, i, cpu_data->cpuset)) {
 			--cpu_count;
 			if (size_needed <= (idx + 1)) {
 				log_error("Not enough memory for array");
@@ -830,21 +855,20 @@ static void set_thread_affinity(thread_data_t *data, cpuset_data_t *cpu_data)
 
 	if (data->def_cpu_data.cpuset == NULL) {
 		/* Get default affinity */
-		cpu_set_t cpuset;
-		unsigned int cpu_count;
+		data->def_cpu_data.cpusetsize = CPU_ALLOC_SIZE(MAX_CPUS);
+		data->def_cpu_data.cpuset = CPU_ALLOC(MAX_CPUS);
+		if (!data->def_cpu_data.cpuset) {
+			perror("cpu_set_t malloc");
+			exit(EXIT_FAILURE);
+		}
 
 		ret = pthread_getaffinity_np(pthread_self(),
-						    sizeof(cpu_set_t), &cpuset);
+					     data->def_cpu_data.cpusetsize, data->def_cpu_data.cpuset);
 		if (ret != 0) {
 			errno = ret;
 			perror("pthread_get_affinity");
 			exit(EXIT_FAILURE);
 		}
-		cpu_count = CPU_COUNT(&cpuset);
-		data->def_cpu_data.cpusetsize = CPU_ALLOC_SIZE(cpu_count);
-		data->def_cpu_data.cpuset = CPU_ALLOC(cpu_count);
-		memcpy(data->def_cpu_data.cpuset, &cpuset,
-						data->def_cpu_data.cpusetsize);
 		create_cpuset_str(&data->def_cpu_data);
 		data->curr_cpu_data = &data->def_cpu_data;
 	}
@@ -861,8 +885,12 @@ static void set_thread_affinity(thread_data_t *data, cpuset_data_t *cpu_data)
 	if (actual_cpu_data->cpuset == NULL)
 		actual_cpu_data = &data->def_cpu_data;
 
-	if (!CPU_EQUAL(actual_cpu_data->cpuset, data->curr_cpu_data->cpuset))
-	{
+	if (
+	    actual_cpu_data->cpusetsize != data->curr_cpu_data->cpusetsize ||
+	    !CPU_EQUAL_S(
+			 actual_cpu_data->cpusetsize,
+			 actual_cpu_data->cpuset, data->curr_cpu_data->cpuset))
+	  {
 		log_debug("[%d] setting cpu affinity to CPU(s) %s", data->ind,
 			actual_cpu_data->cpuset_str);
 		ret = pthread_setaffinity_np(pthread_self(),
@@ -901,12 +929,17 @@ static void set_thread_membind(thread_data_t *data, numaset_data_t * numa_data)
  * sched_priority is only meaningful for RT tasks. Otherwise, it must be
  * set to 0 for the setattr syscall to succeed.
  */
-static int __sched_priority(thread_data_t *data, sched_data_t *sched_data)
+static int __sched_priority(thread_data_t *data, sched_data_t *sched_data, int curr_prio)
 {
+	int prio = sched_data->prio;
+
+	if (prio == THREAD_PRIORITY_UNCHANGED)
+		prio = curr_prio;
+
 	switch (sched_data->policy) {
 		case rr:
 		case fifo:
-			return sched_data->prio;
+			return prio;
 	}
 
 	 return 0;
@@ -932,7 +965,13 @@ static bool __set_thread_policy_priority(thread_data_t *data,
 	struct sched_param param;
 	int ret;
 
-	param.sched_priority = __sched_priority(data, sched_data);
+	if (sched_data->prio == THREAD_PRIORITY_UNCHANGED) {
+		log_error("Cannot resolve the priority of the thread %s",
+			  data->name);
+		exit(EXIT_FAILURE);
+	}
+
+	param.sched_priority = __sched_priority(data, sched_data, sched_data->prio);
 
 	ret = pthread_setschedparam(pthread_self(),
 				    sched_data->policy,
@@ -1000,12 +1039,20 @@ static void _set_thread_rt(thread_data_t *data, sched_data_t *sched_data)
 }
 
 /* deadline can't rely on the default __set_thread_policy_priority */
-static void _set_thread_deadline(thread_data_t *data, sched_data_t *sched_data)
+static void _set_thread_deadline(thread_data_t *data, sched_data_t *sched_data,
+				 sched_data_t *curr_sched_data)
 {
 	struct sched_attr sa_params = {0};
 	unsigned int flags = 0;
 	pid_t tid;
 	int ret;
+	int curr_prio;
+
+	if (curr_sched_data)
+		curr_prio = curr_sched_data->prio;
+	else
+		/* The value does not matter as it will not be used */
+		curr_prio = THREAD_PRIORITY_UNCHANGED;
 
 	log_debug("[%d] setting scheduler %s exec %lu, deadline %lu"
 		  " period %lu",
@@ -1023,7 +1070,7 @@ static void _set_thread_deadline(thread_data_t *data, sched_data_t *sched_data)
 	sa_params.size = sizeof(struct sched_attr);
 	sa_params.sched_flags = 0;
 	sa_params.sched_policy = SCHED_DEADLINE;
-	sa_params.sched_priority = __sched_priority(data, sched_data);
+	sa_params.sched_priority = __sched_priority(data, sched_data, curr_prio);
 	sa_params.sched_runtime = sched_data->runtime;
 	sa_params.sched_deadline = sched_data->deadline;
 	sa_params.sched_period = sched_data->period;
@@ -1038,24 +1085,44 @@ static void _set_thread_deadline(thread_data_t *data, sched_data_t *sched_data)
 	}
 }
 
-static void _set_thread_uclamp(thread_data_t *data, sched_data_t *sched_data)
+static void _set_thread_uclamp(thread_data_t *data, sched_data_t *sched_data, sched_data_t *curr_sched_data)
 {
 	struct sched_attr sa_params = {0};
 	unsigned int flags = 0;
 	pid_t tid;
 	int ret;
+	policy_t policy;
+	int curr_prio;
 
-	if ((sched_data->util_min == -1 &&
-	     sched_data->util_max == -1))
+	if ((sched_data->util_min == -2 &&
+	     sched_data->util_max == -2))
 		    return;
 
-	sa_params.sched_policy = sched_data->policy;
-	sa_params.sched_priority = __sched_priority(data, sched_data);
+	if (curr_sched_data) {
+		if (sched_data->policy == same)
+			policy = curr_sched_data->policy;
+		else
+			policy = sched_data->policy;
+
+		curr_prio = curr_sched_data->prio;
+	} else {
+		curr_prio = THREAD_PRIORITY_UNCHANGED;
+	}
+
+
+	if (policy == same) {
+		log_error("Cannot resolve the policy of the thread %s",
+			  data->name);
+		exit(EXIT_FAILURE);
+	}
+
+	sa_params.sched_policy = policy;
+	sa_params.sched_priority = __sched_priority(data, sched_data, curr_prio);
 	sa_params.size = sizeof(struct sched_attr);
 	sa_params.sched_flags = SCHED_FLAG_KEEP_ALL;
 	tid = gettid();
 
-	if (sched_data->util_min != -1) {
+	if (sched_data->util_min != -2) {
 		sa_params.sched_util_min = sched_data->util_min;
 		sa_params.sched_flags |= SCHED_FLAG_UTIL_CLAMP_MIN;
 		log_debug("[%d] setting util_min=%d",
@@ -1064,7 +1131,7 @@ static void _set_thread_uclamp(thread_data_t *data, sched_data_t *sched_data)
 			   "rtapp_attrs: event=uclamp util_min=%d",
 			   sched_data->util_min);
 	}
-	if (sched_data->util_max != -1) {
+	if (sched_data->util_max != -2) {
 		sa_params.sched_util_max = sched_data->util_max;
 		sa_params.sched_flags |= SCHED_FLAG_UTIL_CLAMP_MAX;
 		log_debug("[%d] setting util_max=%d",
@@ -1086,30 +1153,37 @@ static void _set_thread_uclamp(thread_data_t *data, sched_data_t *sched_data)
 
 static void set_thread_param(thread_data_t *data, sched_data_t *sched_data)
 {
+	sched_data_t *curr_sched_data = data->curr_sched_data;
+
 	if (!sched_data)
 		return;
 
-	if (data->curr_sched_data == sched_data)
-		return;
+	if (curr_sched_data) {
+		if (curr_sched_data == sched_data)
+			return;
 
-	/* if no policy is specified, reuse the previous one */
-	if ((sched_data->policy == same) && data->curr_sched_data)
-		sched_data->policy = data->curr_sched_data->policy;
+		if (sched_data->prio == curr_sched_data->prio)
+			sched_data->prio = THREAD_PRIORITY_UNCHANGED;
+
+		/* if no policy is specified, reuse the previous one */
+		if (sched_data->policy == same)
+			sched_data->policy = curr_sched_data->policy;
+        }
 
 	switch (sched_data->policy) {
 		case rr:
 		case fifo:
 			_set_thread_rt(data, sched_data);
-			_set_thread_uclamp(data, sched_data);
+			_set_thread_uclamp(data, sched_data, curr_sched_data);
 			break;
 		case other:
 		case idle:
 			_set_thread_cfs(data, sched_data);
-			_set_thread_uclamp(data, sched_data);
+			_set_thread_uclamp(data, sched_data, curr_sched_data);
 			data->lock_pages = 0; /* forced off */
 			break;
 		case deadline:
-			_set_thread_deadline(data, sched_data);
+			_set_thread_deadline(data, sched_data, curr_sched_data);
 			break;
 		default:
 			log_error("Unknown scheduling policy %d",
@@ -1117,6 +1191,13 @@ static void set_thread_param(thread_data_t *data, sched_data_t *sched_data)
 			exit(EXIT_FAILURE);
 	}
 
+	/* Ensure we have an actual valid priority in curr_sched_data at all
+	 * time, since it could be needed in a later phase for sched_setattr()
+	 * syscall
+	 */
+	if (sched_data->prio == THREAD_PRIORITY_UNCHANGED)
+		sched_data->prio = curr_sched_data->prio;
+
 	data->curr_sched_data = sched_data;
 }
 
diff --git a/src/rt-app_parse_config.c b/src/rt-app_parse_config.c
index 3fc699bb..e228c698 100644
--- a/src/rt-app_parse_config.c
+++ b/src/rt-app_parse_config.c
@@ -764,17 +764,17 @@ static void parse_cpuset_data(struct json_object *obj, cpuset_data_t *data)
 		data->cpuset_str = strdup(json_object_to_json_string(cpuset_obj));
 		data->cpusetsize = sizeof(cpu_set_t);
 		data->cpuset = malloc(data->cpusetsize);
-		CPU_ZERO(data->cpuset);
+		CPU_ZERO_S(data->cpusetsize, data->cpuset);
 		for (i = 0; i < json_object_array_length(cpuset_obj); i++) {
 			cpu = json_object_array_get_idx(cpuset_obj, i);
 			cpu_idx = json_object_get_int(cpu);
 			if (cpu_idx > max_cpu) {
 				log_critical(PIN2 "Invalid cpu %u in cpuset %s", cpu_idx, data->cpuset_str);
-				free(data->cpuset);
+				CPU_FREE(data->cpuset);
 				free(data->cpuset_str);
 				exit(EXIT_INV_CONFIG);
 			}
-			CPU_SET(cpu_idx, data->cpuset);
+			CPU_SET_S(cpu_idx, data->cpusetsize, data->cpuset);
 		}
 	} else {
 		data->cpuset_str = strdup("-");
@@ -875,14 +875,14 @@ static sched_data_t *parse_sched_data(struct json_object *obj, int def_policy)
 	tmp_data.period = get_int_value_from(obj, "dl-period", TRUE, tmp_data.runtime);
 	tmp_data.deadline = get_int_value_from(obj, "dl-deadline", TRUE, tmp_data.period);
 
-	/* clamping params (-1: no changes ) */
-	tmp_data.util_min = get_int_value_from(obj, "util_min", TRUE, -1);
-	if (tmp_data.util_min != -1 && tmp_data.util_min > 1024) {
+	/* clamping params (-2: no changes ) */
+	tmp_data.util_min = get_int_value_from(obj, "util_min", TRUE, -2);
+	if (tmp_data.util_min < -2 || tmp_data.util_min > 1024) {
 		log_critical(PIN2 "Invalid util_min %d (>1024)", tmp_data.util_min);
 		exit(EXIT_INV_CONFIG);
 	}
-	tmp_data.util_max = get_int_value_from(obj, "util_max", TRUE, -1);
-	if (tmp_data.util_max != -1 && tmp_data.util_max > 1024) {
+	tmp_data.util_max = get_int_value_from(obj, "util_max", TRUE, -2);
+	if (tmp_data.util_max < -2 || tmp_data.util_max > 1024) {
 		log_critical(PIN2 "Invalid util_max %d (>1024)", tmp_data.util_max);
 		exit(EXIT_INV_CONFIG);
 	}
@@ -905,7 +905,7 @@ static sched_data_t *parse_sched_data(struct json_object *obj, int def_policy)
 	/* Check if we found at least one meaningful scheduler parameter */
 	if (tmp_data.prio != THREAD_PRIORITY_UNCHANGED ||
 	    tmp_data.runtime || tmp_data.period || tmp_data.deadline ||
-	    tmp_data.util_min != -1 || tmp_data.util_max != -1) {
+	    tmp_data.util_min != -2 || tmp_data.util_max != -2) {
 		sched_data_t *new_data;
 
 		/* At least 1 parameters has been set in the object */