summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/sched/hmp.c145
2 files changed, 85 insertions, 62 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c07d844c576e..f3b1688b3be7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2572,8 +2572,8 @@ void wake_up_new_task(struct task_struct *p)
unsigned long flags;
struct rq *rq;
- raw_spin_lock_irqsave(&p->pi_lock, flags);
add_new_task_to_grp(p);
+ raw_spin_lock_irqsave(&p->pi_lock, flags);
/* Initialize new task's runnable average */
init_entity_runnable_average(&p->se);
#ifdef CONFIG_SMP
diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c
index 1d55e226196f..56f4c60d4d5d 100644
--- a/kernel/sched/hmp.c
+++ b/kernel/sched/hmp.c
@@ -1396,16 +1396,6 @@ void reset_hmp_stats(struct hmp_sched_stats *stats, int reset_cra)
}
}
-/*
- * Invoked from three places:
- * 1) try_to_wake_up() -> ... -> select_best_cpu()
- * 2) scheduler_tick() -> ... -> migration_needed() -> select_best_cpu()
- * 3) can_migrate_task()
- *
- * Its safe to de-reference p->grp in first case (since p->pi_lock is held)
- * but not in other cases. p->grp is hence freed after a RCU grace period and
- * accessed under rcu_read_lock()
- */
int preferred_cluster(struct sched_cluster *cluster, struct task_struct *p)
{
struct related_thread_group *grp;
@@ -3883,7 +3873,12 @@ static void _set_preferred_cluster(struct related_thread_group *grp)
void set_preferred_cluster(struct related_thread_group *grp)
{
- raw_spin_lock(&grp->lock);
+ /*
+ * Prevent possible deadlock with update_children(). Not updating
+ * the preferred cluster once is not a big deal.
+ */
+ if (!raw_spin_trylock(&grp->lock))
+ return;
_set_preferred_cluster(grp);
raw_spin_unlock(&grp->lock);
}
@@ -3904,7 +3899,7 @@ static int alloc_group_cputime(struct related_thread_group *grp)
struct rq *rq = cpu_rq(cpu);
u64 window_start = rq->window_start;
- grp->cpu_time = alloc_percpu(struct group_cpu_time);
+ grp->cpu_time = alloc_percpu_gfp(struct group_cpu_time, GFP_ATOMIC);
if (!grp->cpu_time)
return -ENOMEM;
@@ -4088,7 +4083,7 @@ struct related_thread_group *alloc_related_thread_group(int group_id)
{
struct related_thread_group *grp;
- grp = kzalloc(sizeof(*grp), GFP_KERNEL);
+ grp = kzalloc(sizeof(*grp), GFP_ATOMIC);
if (!grp)
return ERR_PTR(-ENOMEM);
@@ -4127,19 +4122,64 @@ static void free_related_thread_group(struct rcu_head *rcu)
kfree(grp);
}
+/*
+ * The thread group for a task can change while we are here. However,
+ * add_new_task_to_grp() will take care of any tasks that we miss here.
+ * When a parent exits, and a child thread is simultaneously exiting,
+ * sched_set_group_id() will synchronize those operations.
+ */
+static void update_children(struct task_struct *leader,
+ struct related_thread_group *grp, int event)
+{
+ struct task_struct *child;
+ struct rq *rq;
+ unsigned long flags;
+
+ if (!thread_group_leader(leader))
+ return;
+
+ if (event == ADD_TASK && !sysctl_sched_enable_thread_grouping)
+ return;
+
+ if (thread_group_empty(leader))
+ return;
+
+ child = next_thread(leader);
+
+ do {
+ rq = task_rq_lock(child, &flags);
+
+ if (event == REM_TASK && child->grp && grp == child->grp) {
+ transfer_busy_time(rq, grp, child, event);
+ list_del_init(&child->grp_list);
+ rcu_assign_pointer(child->grp, NULL);
+ } else if (event == ADD_TASK && !child->grp) {
+ transfer_busy_time(rq, grp, child, event);
+ list_add(&child->grp_list, &grp->tasks);
+ rcu_assign_pointer(child->grp, grp);
+ }
+
+ task_rq_unlock(rq, child, &flags);
+ } while_each_thread(leader, child);
+
+}
+
static void remove_task_from_group(struct task_struct *p)
{
struct related_thread_group *grp = p->grp;
struct rq *rq;
int empty_group = 1;
+ unsigned long flags;
raw_spin_lock(&grp->lock);
- rq = __task_rq_lock(p);
+ rq = task_rq_lock(p, &flags);
transfer_busy_time(rq, p->grp, p, REM_TASK);
list_del_init(&p->grp_list);
rcu_assign_pointer(p->grp, NULL);
- __task_rq_unlock(rq);
+ task_rq_unlock(rq, p, &flags);
+
+ update_children(p, grp, REM_TASK);
if (!list_empty(&grp->tasks)) {
empty_group = 0;
@@ -4158,6 +4198,7 @@ static int
add_task_to_group(struct task_struct *p, struct related_thread_group *grp)
{
struct rq *rq;
+ unsigned long flags;
raw_spin_lock(&grp->lock);
@@ -4165,11 +4206,13 @@ add_task_to_group(struct task_struct *p, struct related_thread_group *grp)
* Change p->grp under rq->lock. Will prevent races with read-side
* reference of p->grp in various hot-paths
*/
- rq = __task_rq_lock(p);
+ rq = task_rq_lock(p, &flags);
transfer_busy_time(rq, grp, p, ADD_TASK);
list_add(&p->grp_list, &grp->tasks);
rcu_assign_pointer(p->grp, grp);
- __task_rq_unlock(rq);
+ task_rq_unlock(rq, p, &flags);
+
+ update_children(p, grp, ADD_TASK);
_set_preferred_cluster(grp);
@@ -4192,82 +4235,62 @@ void add_new_task_to_grp(struct task_struct *new)
parent = new->group_leader;
- /*
- * The parent's pi_lock is required here to protect race
- * against the parent task being removed from the
- * group.
- */
- raw_spin_lock_irqsave(&parent->pi_lock, flags);
+ write_lock_irqsave(&related_thread_group_lock, flags);
- /* protected by pi_lock. */
+ rcu_read_lock();
grp = task_related_thread_group(parent);
- if (!grp) {
- raw_spin_unlock_irqrestore(&parent->pi_lock, flags);
+ rcu_read_unlock();
+
+ /* Its possible that update_children() already added us to the group */
+ if (!grp || new->grp) {
+ write_unlock_irqrestore(&related_thread_group_lock, flags);
return;
}
+
raw_spin_lock(&grp->lock);
rcu_assign_pointer(new->grp, grp);
list_add(&new->grp_list, &grp->tasks);
raw_spin_unlock(&grp->lock);
- raw_spin_unlock_irqrestore(&parent->pi_lock, flags);
+ write_unlock_irqrestore(&related_thread_group_lock, flags);
}
int sched_set_group_id(struct task_struct *p, unsigned int group_id)
{
- int rc = 0, destroy = 0;
+ int rc = 0;
unsigned long flags;
- struct related_thread_group *grp = NULL, *new = NULL;
+ struct related_thread_group *grp = NULL;
-redo:
- raw_spin_lock_irqsave(&p->pi_lock, flags);
+ /* Prevents tasks from exiting while we are managing groups. */
+ write_lock_irqsave(&related_thread_group_lock, flags);
+ /* Switching from one group to another directly is not permitted */
if ((current != p && p->flags & PF_EXITING) ||
(!p->grp && !group_id) ||
- (p->grp && p->grp->id == group_id))
+ (p->grp && group_id))
goto done;
- write_lock(&related_thread_group_lock);
-
if (!group_id) {
remove_task_from_group(p);
- write_unlock(&related_thread_group_lock);
goto done;
}
- if (p->grp && p->grp->id != group_id)
- remove_task_from_group(p);
-
grp = lookup_related_thread_group(group_id);
- if (!grp && !new) {
- /* New group */
- write_unlock(&related_thread_group_lock);
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
- new = alloc_related_thread_group(group_id);
- if (IS_ERR(new))
- return -ENOMEM;
- destroy = 1;
- /* Rerun checks (like task exiting), since we dropped pi_lock */
- goto redo;
- } else if (!grp && new) {
- /* New group - use object allocated before */
- destroy = 0;
- list_add(&new->list, &related_thread_groups);
- grp = new;
+ if (!grp) {
+ grp = alloc_related_thread_group(group_id);
+ if (IS_ERR(grp)) {
+ rc = -ENOMEM;
+ goto done;
+ }
+
+ list_add(&grp->list, &related_thread_groups);
}
BUG_ON(!grp);
rc = add_task_to_group(p, grp);
- write_unlock(&related_thread_group_lock);
done:
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-
- if (new && destroy) {
- free_group_cputime(new);
- kfree(new);
- }
-
+ write_unlock_irqrestore(&related_thread_group_lock, flags);
return rc;
}