From 73fe6aae84400e2b475e2a1dc4e8592cd3ed6e69 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Fri, 25 Jan 2008 21:08:07 +0100 Subject: sched: add RT-balance cpu-weight Some RT tasks (particularly kthreads) are bound to one specific CPU. It is fairly common for two or more bound tasks to get queued up at the same time. Consider, for instance, softirq_timer and softirq_sched. A timer goes off in an ISR which schedules softirq_thread to run at RT50. Then the timer handler determines that it's time to smp-rebalance the system so it schedules softirq_sched to run. So we are in a situation where we have two RT50 tasks queued, and the system will go into rt-overload condition to request other CPUs for help. This causes two problems in the current code: 1) If a high-priority bound task and a low-priority unbounded task queue up behind the running task, we will fail to ever relocate the unbounded task because we terminate the search on the first unmovable task. 2) We spend precious futile cycles in the fast-path trying to pull overloaded tasks over. It is therefore optimial to strive to avoid the overhead all together if we can cheaply detect the condition before overload even occurs. This patch tries to achieve this optimization by utilizing the hamming weight of the task->cpus_allowed mask. A weight of 1 indicates that the task cannot be migrated. We will then utilize this information to skip non-migratable tasks and to eliminate uncessary rebalance attempts. We introduce a per-rq variable to count the number of migratable tasks that are currently running. We only go into overload if we have more than one rt task, AND at least one of them is migratable. In addition, we introduce a per-task variable to cache the cpus_allowed weight, since the hamming calculation is probably relatively expensive. We only update the cached value when the mask is updated which should be relatively infrequent, especially compared to scheduling frequency in the fast path. Signed-off-by: Gregory Haskins Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/init_task.h') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index cae35b6b9aec..572c65bcc80f 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -130,6 +130,7 @@ extern struct group_info init_groups; .normal_prio = MAX_PRIO-20, \ .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ + .nr_cpus_allowed = NR_CPUS, \ .mm = NULL, \ .active_mm = &init_mm, \ .run_list = LIST_HEAD_INIT(tsk.run_list), \ -- cgit v1.2.3 From fa717060f1ab7eb6570f2fb49136f838fc9195a9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 25 Jan 2008 21:08:27 +0100 Subject: sched: sched_rt_entity Move the task_struct members specific to rt scheduling together. A future optimization could be to put sched_entity and sched_rt_entity into a union. Signed-off-by: Peter Zijlstra CC: Srivatsa Vaddagiri Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux/init_task.h') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 572c65bcc80f..ee65d87bedb7 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -133,9 +133,10 @@ extern struct group_info init_groups; .nr_cpus_allowed = NR_CPUS, \ .mm = NULL, \ .active_mm = &init_mm, \ - .run_list = LIST_HEAD_INIT(tsk.run_list), \ + .rt = { \ + .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ + .time_slice = HZ, }, \ .ioprio = 0, \ - .time_slice = HZ, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \ .ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \ -- cgit v1.2.3 From 6f505b16425a51270058e4a93441fe64de3dd435 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 25 Jan 2008 21:08:30 +0100 Subject: sched: rt group scheduling Extend group scheduling to also cover the realtime classes. It uses the time limiting introduced by the previous patch to allow multiple realtime groups. The hard time limit is required to keep behaviour deterministic. The algorithms used make the realtime scheduler O(tg), linear scaling wrt the number of task groups. This is the worst case behaviour I can't seem to get out of, the avg. case of the algorithms can be improved, I focused on correctness and worst case. [ akpm@linux-foundation.org: move side-effects out of BUG_ON(). ] Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux/init_task.h') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index ee65d87bedb7..796019b22b6f 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -130,12 +130,13 @@ extern struct group_info init_groups; .normal_prio = MAX_PRIO-20, \ .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ - .nr_cpus_allowed = NR_CPUS, \ .mm = NULL, \ .active_mm = &init_mm, \ .rt = { \ .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ - .time_slice = HZ, }, \ + .time_slice = HZ, \ + .nr_cpus_allowed = NR_CPUS, \ + }, \ .ioprio = 0, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \ -- cgit v1.2.3 From fd0928df98b9578be8a786ac0cb78a47a5e17a20 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 24 Jan 2008 08:52:45 +0100 Subject: ioprio: move io priority from task_struct to io_context This is where it belongs and then it doesn't take up space for a process that doesn't do IO. Signed-off-by: Jens Axboe --- include/linux/init_task.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/init_task.h') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 796019b22b6f..e6b3f7080679 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -137,7 +137,6 @@ extern struct group_info init_groups; .time_slice = HZ, \ .nr_cpus_allowed = NR_CPUS, \ }, \ - .ioprio = 0, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \ .ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \ -- cgit v1.2.3