diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/sched/core.c | 26 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 15 |
2 files changed, 33 insertions, 8 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0b5c588929e9..31cb76a915a8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2259,11 +2259,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) __sched_fork(clone_flags, p); /* - * We mark the process as running here. This guarantees that + * We mark the process as NEW here. This guarantees that * nobody will actually run it, and a signal or other external * event cannot wake it up and insert it on the runqueue either. */ - p->state = TASK_RUNNING; + p->state = TASK_NEW; /* * Make sure we do not leak PI boosting priority to the child. @@ -2300,6 +2300,8 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->sched_class = &fair_sched_class; } + init_entity_runnable_average(&p->se); + /* * The child is not yet in the pid-hash so no cgroup attach races, * and the cgroup is pinned to this child due to cgroup_fork() @@ -2446,6 +2448,7 @@ void wake_up_new_task(struct task_struct *p) struct rq *rq; raw_spin_lock_irqsave(&p->pi_lock, flags); + p->state = TASK_RUNNING; walt_init_new_task_load(p); @@ -8690,6 +8693,7 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) { struct task_struct *task; struct cgroup_subsys_state *css; + int ret = 0; cgroup_taskset_for_each(task, css, tset) { #ifdef CONFIG_RT_GROUP_SCHED @@ -8700,8 +8704,24 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) if (task->sched_class != &fair_sched_class) return -EINVAL; #endif + /* + * Serialize against wake_up_new_task() such that if its + * running, we're sure to observe its full state. + */ + raw_spin_lock_irq(&task->pi_lock); + /* + * Avoid calling sched_move_task() before wake_up_new_task() + * has happened. This would lead to problems with PELT, due to + * move wanting to detach+attach while we're not attached yet. + */ + if (task->state == TASK_NEW) + ret = -EINVAL; + raw_spin_unlock_irq(&task->pi_lock); + + if (ret) + break; } - return 0; + return ret; } static void cpu_cgroup_attach(struct cgroup_taskset *tset) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2afd81bfda99..76af6e25e82e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -766,7 +766,9 @@ void init_entity_runnable_average(struct sched_entity *se) } static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq); +static int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq); static void attach_entity_cfs_rq(struct sched_entity *se); +static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se); /* * With new tasks being created, their initial util_avgs are extrapolated @@ -837,7 +839,7 @@ void post_init_entity_util_avg(struct sched_entity *se) attach_entity_cfs_rq(se); } -#else +#else /* !CONFIG_SMP */ void init_entity_runnable_average(struct sched_entity *se) { } @@ -3312,11 +3314,14 @@ void remove_entity_load_avg(struct sched_entity *se) struct cfs_rq *cfs_rq = cfs_rq_of(se); /* - * Newly created task or never used group entity should not be removed - * from its (source) cfs_rq + * tasks cannot exit without having gone through wake_up_new_task() -> + * post_init_entity_util_avg() which will have added things to the + * cfs_rq, so we can remove unconditionally. + * + * Similarly for groups, they will have passed through + * post_init_entity_util_avg() before unregister_sched_fair_group() + * calls this. */ - if (se->avg.last_update_time == 0) - return; sync_entity_load_avg(se); atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg); |
