From 363ab6f1424cdea63e5d182312d60e19077b892a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: core: use performance variant for_each_cpu_mask_nr Change references from for_each_cpu_mask to for_each_cpu_mask_nr where appropriate Reviewed-by: Paul Jackson Reviewed-by: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/sched_rt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/sched_rt.c') diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 060e87b0cb1c..d73386c6e361 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -231,7 +231,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) return 1; span = sched_rt_period_mask(); - for_each_cpu_mask(i, span) { + for_each_cpu_mask_nr(i, span) { int enqueue = 0; struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); struct rq *rq = rq_of_rt_rq(rt_rq); @@ -272,7 +272,7 @@ static int balance_runtime(struct rt_rq *rt_rq) spin_lock(&rt_b->rt_runtime_lock); rt_period = ktime_to_ns(rt_b->rt_period); - for_each_cpu_mask(i, rd->span) { + for_each_cpu_mask_nr(i, rd->span) { struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); s64 diff; @@ -1000,7 +1000,7 @@ static int pull_rt_task(struct rq *this_rq) next = pick_next_task_rt(this_rq); - for_each_cpu_mask(cpu, this_rq->rd->rto_mask) { + for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) { if (this_cpu == cpu) continue; -- cgit v1.2.3 From 7ebefa8ceefed44cc321be70afc54a585a68ac0b Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Tue, 1 Jul 2008 23:32:15 +0200 Subject: sched: rework of "prioritize non-migratable tasks over migratable ones" (1) handle in a generic way all cases when a newly woken-up task is not migratable (not just a corner case when "rt_se->nr_cpus_allowed == 1") (2) if current is to be preempted, then make sure "p" will be picked up by pick_next_task_rt(). i.e. move task's group at the head of its list as well. currently, it's not a case for the group-scheduling case as described here: http://www.ussg.iu.edu/hypermail/linux/kernel/0807.0/0134.html Signed-off-by: Dmitry Adamushko Cc: Steven Rostedt Cc: Gregory Haskins Signed-off-by: Ingo Molnar --- kernel/sched_rt.c | 68 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 28 deletions(-) (limited to 'kernel/sched_rt.c') diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 47ceac9e8552..d3d1cccb3d7b 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -599,11 +599,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) return; - if (rt_se->nr_cpus_allowed == 1) - list_add(&rt_se->run_list, queue); - else - list_add_tail(&rt_se->run_list, queue); - + list_add_tail(&rt_se->run_list, queue); __set_bit(rt_se_prio(rt_se), array->bitmap); inc_rt_tasks(rt_se, rt_rq); @@ -688,32 +684,34 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) * Put task to the end of the run list without the overhead of dequeue * followed by enqueue. */ -static -void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) +static void +requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) { - struct rt_prio_array *array = &rt_rq->active; - if (on_rt_rq(rt_se)) { - list_del_init(&rt_se->run_list); - list_add_tail(&rt_se->run_list, - array->queue + rt_se_prio(rt_se)); + struct rt_prio_array *array = &rt_rq->active; + struct list_head *queue = array->queue + rt_se_prio(rt_se); + + if (head) + list_move(&rt_se->run_list, queue); + else + list_move_tail(&rt_se->run_list, queue); } } -static void requeue_task_rt(struct rq *rq, struct task_struct *p) +static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) { struct sched_rt_entity *rt_se = &p->rt; struct rt_rq *rt_rq; for_each_sched_rt_entity(rt_se) { rt_rq = rt_rq_of_se(rt_se); - requeue_rt_entity(rt_rq, rt_se); + requeue_rt_entity(rt_rq, rt_se, head); } } static void yield_task_rt(struct rq *rq) { - requeue_task_rt(rq, rq->curr); + requeue_task_rt(rq, rq->curr, 0); } #ifdef CONFIG_SMP @@ -753,6 +751,30 @@ static int select_task_rq_rt(struct task_struct *p, int sync) */ return task_cpu(p); } + +static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) +{ + cpumask_t mask; + + if (rq->curr->rt.nr_cpus_allowed == 1) + return; + + if (p->rt.nr_cpus_allowed != 1 + && cpupri_find(&rq->rd->cpupri, p, &mask)) + return; + + if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) + return; + + /* + * There appears to be other cpus that can accept + * current and none to run 'p', so lets reschedule + * to try and push current away: + */ + requeue_task_rt(rq, p, 1); + resched_task(rq->curr); +} + #endif /* CONFIG_SMP */ /* @@ -778,18 +800,8 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) * to move current somewhere else, making room for our non-migratable * task. */ - if((p->prio == rq->curr->prio) - && p->rt.nr_cpus_allowed == 1 - && rq->curr->rt.nr_cpus_allowed != 1) { - cpumask_t mask; - - if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) - /* - * There appears to be other cpus that can accept - * current, so lets reschedule to try and push it away - */ - resched_task(rq->curr); - } + if (p->prio == rq->curr->prio && !need_resched()) + check_preempt_equal_prio(rq, p); #endif } @@ -1415,7 +1427,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) * on the queue: */ if (p->rt.run_list.prev != p->rt.run_list.next) { - requeue_task_rt(rq, p); + requeue_task_rt(rq, p, 0); set_tsk_need_resched(p); } } -- cgit v1.2.3 From e761b7725234276a802322549cee5255305a0930 Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Tue, 15 Jul 2008 04:43:49 -0700 Subject: cpu hotplug, sched: Introduce cpu_active_map and redo sched domain managment (take 2) This is based on Linus' idea of creating cpu_active_map that prevents scheduler load balancer from migrating tasks to the cpu that is going down. It allows us to simplify domain management code and avoid unecessary domain rebuilds during cpu hotplug event handling. Please ignore the cpusets part for now. It needs some more work in order to avoid crazy lock nesting. Although I did simplfy and unify domain reinitialization logic. We now simply call partition_sched_domains() in all the cases. This means that we're using exact same code paths as in cpusets case and hence the test below cover cpusets too. Cpuset changes to make rebuild_sched_domains() callable from various contexts are in the separate patch (right next after this one). This not only boots but also easily handles while true; do make clean; make -j 8; done and while true; do on-off-cpu 1; done at the same time. (on-off-cpu 1 simple does echo 0/1 > /sys/.../cpu1/online thing). Suprisingly the box (dual-core Core2) is quite usable. In fact I'm typing this on right now in gnome-terminal and things are moving just fine. Also this is running with most of the debug features enabled (lockdep, mutex, etc) no BUG_ONs or lockdep complaints so far. I believe I addressed all of the Dmitry's comments for original Linus' version. I changed both fair and rt balancer to mask out non-active cpus. And replaced cpu_is_offline() with !cpu_active() in the main scheduler code where it made sense (to me). Signed-off-by: Max Krasnyanskiy Acked-by: Linus Torvalds Acked-by: Peter Zijlstra Acked-by: Gregory Haskins Cc: dmitry.adamushko@gmail.com Cc: pj@sgi.com Signed-off-by: Ingo Molnar --- kernel/sched_rt.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/sched_rt.c') diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index d3d1cccb3d7b..50735bb96149 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -933,6 +933,13 @@ static int find_lowest_rq(struct task_struct *task) if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) return -1; /* No targets found */ + /* + * Only consider CPUs that are usable for migration. + * I guess we might want to change cpupri_find() to ignore those + * in the first place. + */ + cpus_and(*lowest_mask, *lowest_mask, cpu_active_map); + /* * At this point we have built a mask of cpus representing the * lowest priority tasks in the system. Now we want to elect -- cgit v1.2.3 From 577b4a58d2e74a4d48050eeea3e3f952ce04eb86 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Jul 2008 13:34:54 +0100 Subject: sched: fix warning in inc_rt_tasks() to not declare variable 'rq' if it's not needed Fix inc_rt_tasks() to not declare variable 'rq' if it's not needed. It is declared if CONFIG_SMP or CONFIG_RT_GROUP_SCHED, but only used if CONFIG_SMP. This is a consequence of patch 1f11eb6a8bc92536d9e93ead48fa3ffbd1478571 plus patch 1100ac91b6af02d8639d518fad5b434b1bf44ed6. Signed-off-by: David Howells Signed-off-by: Ingo Molnar --- kernel/sched_rt.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/sched_rt.c') diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 47ceac9e8552..147004c651c0 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -505,7 +505,9 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) rt_rq->rt_nr_running++; #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED if (rt_se_prio(rt_se) < rt_rq->highest_prio) { +#ifdef CONFIG_SMP struct rq *rq = rq_of_rt_rq(rt_rq); +#endif rt_rq->highest_prio = rt_se_prio(rt_se); #ifdef CONFIG_SMP -- cgit v1.2.3 From 58838cf3ca3337d76141c33d6c68376490263468 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 24 Jul 2008 12:43:13 +0200 Subject: sched: clean up compiler warning Reported-by: Daniel Walker Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched_rt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched_rt.c') diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 147004c651c0..93ac8ee08271 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -253,7 +253,7 @@ static int do_balance_runtime(struct rt_rq *rt_rq) diff = iter->rt_runtime - iter->rt_time; if (diff > 0) { - do_div(diff, weight); + diff = div_u64((u64)diff, weight); if (rt_rq->rt_runtime + diff > rt_period) diff = rt_period - rt_rq->rt_runtime; iter->rt_runtime -= diff; -- cgit v1.2.3 From 1b12bbc747560ea68bcc132c3d05699e52271da0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 11 Aug 2008 09:30:22 +0200 Subject: lockdep: re-annotate scheduler runqueues Instead of using a per-rq lock class, use the regular nesting operations. However, take extra care with double_lock_balance() as it can release the already held rq->lock (and therefore change its nesting class). So what can happen is: spin_lock(rq->lock); // this rq subclass 0 double_lock_balance(rq, other_rq); // release rq // acquire other_rq->lock subclass 0 // acquire rq->lock subclass 1 spin_unlock(other_rq->lock); leaving you with rq->lock in subclass 1 So a subsequent double_lock_balance() call can try to nest a subclass 1 lock while already holding a subclass 1 lock. Fix this by introducing double_unlock_balance() which releases the other rq's lock, but also re-sets the subclass for this rq's lock to 0. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched_rt.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel/sched_rt.c') diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 908c04f9dad0..6163e4cf885b 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -861,6 +861,8 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) #define RT_MAX_TRIES 3 static int double_lock_balance(struct rq *this_rq, struct rq *busiest); +static void double_unlock_balance(struct rq *this_rq, struct rq *busiest); + static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) @@ -1022,7 +1024,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) break; /* try again */ - spin_unlock(&lowest_rq->lock); + double_unlock_balance(rq, lowest_rq); lowest_rq = NULL; } @@ -1091,7 +1093,7 @@ static int push_rt_task(struct rq *rq) resched_task(lowest_rq->curr); - spin_unlock(&lowest_rq->lock); + double_unlock_balance(rq, lowest_rq); ret = 1; out: @@ -1197,7 +1199,7 @@ static int pull_rt_task(struct rq *this_rq) } skip: - spin_unlock(&src_rq->lock); + double_unlock_balance(this_rq, src_rq); } return ret; -- cgit v1.2.3