const char      *time_str;
        struct perf_time_interval ptime;
        struct perf_time_interval hist_time;
+       volatile bool   thread_funcs_exit;
 };
 
 /* per thread run time data */
        prctl(PR_SET_NAME, comm2);
        if (fd < 0)
                return NULL;
-again:
-       ret = sem_post(&this_task->ready_for_work);
-       BUG_ON(ret);
-       mutex_lock(&sched->start_work_mutex);
-       mutex_unlock(&sched->start_work_mutex);
 
-       cpu_usage_0 = get_cpu_usage_nsec_self(fd);
+       while (!sched->thread_funcs_exit) {
+               ret = sem_post(&this_task->ready_for_work);
+               BUG_ON(ret);
+               mutex_lock(&sched->start_work_mutex);
+               mutex_unlock(&sched->start_work_mutex);
 
-       for (i = 0; i < this_task->nr_events; i++) {
-               this_task->curr_event = i;
-               perf_sched__process_event(sched, this_task->atoms[i]);
-       }
+               cpu_usage_0 = get_cpu_usage_nsec_self(fd);
 
-       cpu_usage_1 = get_cpu_usage_nsec_self(fd);
-       this_task->cpu_usage = cpu_usage_1 - cpu_usage_0;
-       ret = sem_post(&this_task->work_done_sem);
-       BUG_ON(ret);
+               for (i = 0; i < this_task->nr_events; i++) {
+                       this_task->curr_event = i;
+                       perf_sched__process_event(sched, this_task->atoms[i]);
+               }
 
-       mutex_lock(&sched->work_done_wait_mutex);
-       mutex_unlock(&sched->work_done_wait_mutex);
+               cpu_usage_1 = get_cpu_usage_nsec_self(fd);
+               this_task->cpu_usage = cpu_usage_1 - cpu_usage_0;
+               ret = sem_post(&this_task->work_done_sem);
+               BUG_ON(ret);
 
-       goto again;
+               mutex_lock(&sched->work_done_wait_mutex);
+               mutex_unlock(&sched->work_done_wait_mutex);
+       }
+       return NULL;
 }
 
 static void create_tasks(struct perf_sched *sched)
+       EXCLUSIVE_LOCK_FUNCTION(sched->start_work_mutex)
+       EXCLUSIVE_LOCK_FUNCTION(sched->work_done_wait_mutex)
 {
        struct task_desc *task;
        pthread_attr_t attr;
 }
 
 static void wait_for_tasks(struct perf_sched *sched)
+       EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex)
+       EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex)
 {
        u64 cpu_usage_0, cpu_usage_1;
        struct task_desc *task;
 }
 
 static void run_one_test(struct perf_sched *sched)
+       EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex)
+       EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex)
 {
        u64 T0, T1, delta, avg_delta, fluct;
 
        print_task_traces(sched);
        add_cross_task_wakeups(sched);
 
+       sched->thread_funcs_exit = false;
        create_tasks(sched);
        printf("------------------------------------------------------------\n");
        for (i = 0; i < sched->replay_repeat; i++)
                run_one_test(sched);
 
+       sched->thread_funcs_exit = true;
+       mutex_unlock(&sched->start_work_mutex);
+       mutex_unlock(&sched->work_done_wait_mutex);
        return 0;
 }