diff options
| author | Blagovest Kolenichev <bkolenichev@codeaurora.org> | 2017-11-06 07:02:23 -0800 |
|---|---|---|
| committer | Blagovest Kolenichev <bkolenichev@codeaurora.org> | 2017-11-06 15:58:47 -0800 |
| commit | 985aecee1dfefe1471290deffdebd2a9a2f073c9 (patch) | |
| tree | 8c55a0ea6933c9ee3e0649ccda5fbd03c013367b /kernel/sched/core.c | |
| parent | 22b18281457de02c9c830504fd4d48726db86f31 (diff) | |
| parent | ceee5bdd470586fddfbbb8c6d0287ba792525d3f (diff) | |
Merge android-4.4@ceee5bd (v4.4.95) into msm-4.4
* refs/heads/tmp-ceee5bd
BACKPORT: arm64: relocatable: suppress R_AARCH64_ABS64 relocations in vmlinux
sched/core: fix have_sched_energy_data build warning
sched/core: Warn if ENERGY_AWARE is enabled but data is missing
sched: walt: Correct WALT window size initialization
FROMLIST: sched/fair: Use wake_q length as a hint for wake_wide
sched: WALT: account cumulative window demand
sched/fair: remove useless variable in find_best_target
sched/tune: access schedtune_initialized under CGROUP_SCHEDTUNE
sched/fair: consider task utilization in group_max_util()
sched/fair: consider task utilization in group_norm_util()
sched/fair: enforce EAS mode
sched/fair: ignore backup CPU when not valid
sched/fair: trace energy_diff for non boosted tasks
UPSTREAM: sched/fair: Sync task util before slow-path wakeup
UPSTREAM: sched/fair: Fix usage of find_idlest_group() when the local group is idlest
UPSTREAM: sched/fair: Fix usage of find_idlest_group() when no groups are allowed
BACKPORT: sched/fair: Fix find_idlest_group when local group is not allowed
UPSTREAM: sched/fair: Remove unnecessary comparison with -1
BACKPORT: sched/fair: Move select_task_rq_fair slow-path into its own function
UPSTREAM: sched/fair: Force balancing on nohz balance if local group has capacity
UPSTREAM: sched/core: Add missing update_rq_clock() call in set_user_nice()
UPSTREAM: sched/core: Add missing update_rq_clock() call for task_hot()
UPSTREAM: sched/core: Add missing update_rq_clock() in detach_task_cfs_rq()
UPSTREAM: sched/core: Add missing update_rq_clock() in post_init_entity_util_avg()
UPSTREAM: sched/core: Fix find_idlest_group() for fork
BACKPORT: sched/fair: Fix PELT integrity for new tasks
BACKPORT: sched/cgroup: Fix cpu_cgroup_fork() handling
UPSTREAM: sched/fair: Fix and optimize the fork() path
BACKPORT: sched/fair: Make it possible to account fair load avg consistently
cpufreq/sched: Consider max cpu capacity when choosing frequencies
Linux 4.4.95
FS-Cache: fix dereference of NULL user_key_payload
fscrypto: require write access to mount to set encryption policy
KEYS: Fix race between updating and finding a negative key
fscrypt: fix dereference of NULL user_key_payload
f2fs crypto: add missing locking for keyring_key access
f2fs crypto: replace some BUG_ON()'s with error checks
sched/autogroup: Fix autogroup_move_group() to never skip sched_move_task()
parisc: Fix double-word compare and exchange in LWS code on 32-bit kernels
parisc: Avoid trashing sr2 and sr3 in LWS code
pkcs7: Prevent NULL pointer dereference, since sinfo is not always set.
KEYS: don't let add_key() update an uninstantiated key
lib/digsig: fix dereference of NULL user_key_payload
KEYS: encrypted: fix dereference of NULL user_key_payload
rtlwifi: rtl8821ae: Fix connection lost problem
clockevents/drivers/cs5535: Improve resilience to spurious interrupts
bus: mbus: fix window size calculation for 4GB windows
brcmsmac: make some local variables 'static const' to reduce stack size
i2c: ismt: Separate I2C block read from SMBus block read
ALSA: hda: Remove superfluous '-' added by printk conversion
ALSA: seq: Enable 'use' locking in all configurations
drm/nouveau/mmu: flush tlbs before deleting page tables
drm/nouveau/bsp/g92: disable by default
can: esd_usb2: Fix can_dlc value for received RTR, frames
usb: musb: Check for host-mode using is_host_active() on reset interrupt
usb: musb: sunxi: Explicitly release USB PHY on exit
can: gs_usb: fix busy loop if no more TX context is available
ALSA: usb-audio: Add native DSD support for Pro-Ject Pre Box S2 Digital
usb: hub: Allow reset retry for USB2 devices on connect bounce
usb: quirks: add quirk for WORLDE MINI MIDI keyboard
usb: cdc_acm: Add quirk for Elatec TWN3
USB: serial: metro-usb: add MS7820 device id
USB: core: fix out-of-bounds access bug in usb_get_bos_descriptor()
USB: devio: Revert "USB: devio: Don't corrupt user memory"
ANDROID: binder: show high watermark of alloc->pages.
ANDROID: binder: Add thread->process_todo flag.
UPSTREAM: arm64: compat: Remove leftover variable declaration
ANDROID: sched/fair: Select correct capacity state for energy_diff
Revert "UPSTREAM: efi/libstub/arm64: Set -fpie when building the EFI stub"
cpufreq: schedutil: clamp util to CPU maximum capacity
FROMLIST: android: binder: Fix null ptr dereference in debug msg
FROMLIST: android: binder: Change binder_shrinker to static
cpufreq/sched: Use cpu max freq rather than policy max
Conflicts:
include/linux/sched.h
kernel/sched/core.c
kernel/sched/fair.c
Change-Id: I2751f851df741f00e797deaf2119872b3dced655
Signed-off-by: Blagovest Kolenichev <bkolenichev@codeaurora.org>
Diffstat (limited to 'kernel/sched/core.c')
| -rw-r--r-- | kernel/sched/core.c | 184 |
1 files changed, 144 insertions, 40 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c408280ddd12..eacfd2ac56a1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -99,6 +99,10 @@ ATOMIC_NOTIFIER_HEAD(load_alert_notifier_head); +#ifdef CONFIG_SMP +static bool have_sched_energy_data(void); +#endif + DEFINE_MUTEX(sched_domains_mutex); DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); @@ -201,6 +205,11 @@ static int sched_feat_set(char *cmp) sysctl_sched_features &= ~(1UL << i); sched_feat_disable(i); } else { +#ifdef CONFIG_SMP + if (i == __SCHED_FEAT_ENERGY_AWARE) + WARN(!have_sched_energy_data(), + "Missing sched energy data\n"); +#endif sysctl_sched_features |= (1UL << i); sched_feat_enable(i); } @@ -554,6 +563,8 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL)) return; + head->count++; + get_task_struct(task); /* @@ -563,6 +574,10 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) head->lastp = &node->next; } +static int +try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags, + int sibling_count_hint); + void wake_up_q(struct wake_q_head *head) { struct wake_q_node *node = head->first; @@ -577,10 +592,10 @@ void wake_up_q(struct wake_q_head *head) task->wake_q.next = NULL; /* - * wake_up_process() implies a wmb() to pair with the queueing + * try_to_wake_up() implies a wmb() to pair with the queueing * in wake_q_add() so as not to miss wakeups. */ - wake_up_process(task); + try_to_wake_up(task, TASK_NORMAL, 0, head->count); put_task_struct(task); } } @@ -1702,14 +1717,16 @@ out: * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable. */ static inline -int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) +int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags, + int sibling_count_hint) { bool allow_isolated = (p->flags & PF_KTHREAD); lockdep_assert_held(&p->pi_lock); if (p->nr_cpus_allowed > 1) - cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); + cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags, + sibling_count_hint); /* * In order not to call set_task_cpu() on a blocking task we need @@ -2007,6 +2024,8 @@ static void ttwu_queue(struct task_struct *p, int cpu) * @p: the thread to be awakened * @state: the mask of task states that can be woken * @wake_flags: wake modifier flags (WF_*) + * @sibling_count_hint: A hint at the number of threads that are being woken up + * in this event. * * Put it on the run-queue if it's not already there. The "current" * thread is always on the run-queue (except when the actual @@ -2018,7 +2037,8 @@ static void ttwu_queue(struct task_struct *p, int cpu) * or @state didn't match @p's state. */ static int -try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) +try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags, + int sibling_count_hint) { unsigned long flags; int cpu, src_cpu, success = 0; @@ -2134,7 +2154,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) if (p->sched_class->task_waking) p->sched_class->task_waking(p); - cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags); + cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags, + sibling_count_hint); /* Refresh src_cpu as it could have changed since we last read it */ src_cpu = task_cpu(p); @@ -2236,7 +2257,7 @@ out: */ int wake_up_process(struct task_struct *p) { - return try_to_wake_up(p, TASK_NORMAL, 0); + return try_to_wake_up(p, TASK_NORMAL, 0, 1); } EXPORT_SYMBOL(wake_up_process); @@ -2256,13 +2277,13 @@ EXPORT_SYMBOL(wake_up_process); int wake_up_process_no_notif(struct task_struct *p) { WARN_ON(task_is_stopped_or_traced(p)); - return try_to_wake_up(p, TASK_NORMAL, WF_NO_NOTIFIER); + return try_to_wake_up(p, TASK_NORMAL, WF_NO_NOTIFIER, 1); } EXPORT_SYMBOL(wake_up_process_no_notif); int wake_up_state(struct task_struct *p, unsigned int state) { - return try_to_wake_up(p, state, 0); + return try_to_wake_up(p, state, 0, 1); } /* @@ -2337,9 +2358,16 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->se.prev_sum_exec_runtime = 0; p->se.nr_migrations = 0; p->se.vruntime = 0; +#ifdef CONFIG_SCHED_WALT + p->last_sleep_ts = 0; +#endif INIT_LIST_HEAD(&p->se.group_node); +#ifdef CONFIG_FAIR_GROUP_SCHED + p->se.cfs_rq = NULL; +#endif + #ifdef CONFIG_SCHEDSTATS memset(&p->se.statistics, 0, sizeof(p->se.statistics)); #endif @@ -2429,11 +2457,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) __sched_fork(clone_flags, p); /* - * We mark the process as running here. This guarantees that + * We mark the process as NEW here. This guarantees that * nobody will actually run it, and a signal or other external * event cannot wake it up and insert it on the runqueue either. */ - p->state = TASK_RUNNING; + p->state = TASK_NEW; /* * Make sure we do not leak PI boosting priority to the child. @@ -2470,8 +2498,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->sched_class = &fair_sched_class; } - if (p->sched_class->task_fork) - p->sched_class->task_fork(p); + init_entity_runnable_average(&p->se); /* * The child is not yet in the pid-hash so no cgroup attach races, @@ -2481,7 +2508,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) * Silence PROVE_RCU. */ raw_spin_lock_irqsave(&p->pi_lock, flags); - set_task_cpu(p, cpu); + /* + * We're setting the cpu for the first time, we don't migrate, + * so use __set_task_cpu(). + */ + __set_task_cpu(p, cpu); + if (p->sched_class->task_fork) + p->sched_class->task_fork(p); raw_spin_unlock_irqrestore(&p->pi_lock, flags); #ifdef CONFIG_SCHED_INFO @@ -2614,6 +2647,8 @@ void wake_up_new_task(struct task_struct *p) add_new_task_to_grp(p); raw_spin_lock_irqsave(&p->pi_lock, flags); + p->state = TASK_RUNNING; + /* Initialize new task's runnable average */ init_entity_runnable_average(&p->se); #ifdef CONFIG_SMP @@ -2621,11 +2656,15 @@ void wake_up_new_task(struct task_struct *p) * Fork balancing, do it here and not earlier because: * - cpus_allowed can change in the fork path * - any previously selected cpu might disappear through hotplug + * + * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, + * as we're not fully set-up yet. */ - set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); + __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0, 1)); #endif rq = __task_rq_lock(p); mark_task_starting(p); + update_rq_clock(rq); post_init_entity_util_avg(&p->se); activate_task(rq, p, ENQUEUE_WAKEUP_NEW); p->on_rq = TASK_ON_RQ_QUEUED; @@ -3071,7 +3110,7 @@ void sched_exec(void) raw_spin_lock_irqsave(&p->pi_lock, flags); curr_cpu = task_cpu(p); - dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0); + dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0, 1); if (dest_cpu == smp_processor_id()) goto unlock; @@ -3171,7 +3210,9 @@ static void sched_freq_tick_pelt(int cpu) * utilization and to harm its performance the least, request * a jump to a higher OPP as soon as the margin of free capacity * is impacted (specified by capacity_margin). + * Remember CPU utilization in sched_capacity_reqs should be normalised. */ + cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu); set_cfs_cpu_capacity(cpu, true, cpu_utilization); } @@ -3198,7 +3239,9 @@ static void sched_freq_tick_walt(int cpu) * It is likely that the load is growing so we * keep the added margin in our request as an * extra boost. + * Remember CPU utilization in sched_capacity_reqs should be normalised. */ + cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu); set_cfs_cpu_capacity(cpu, true, cpu_utilization); } @@ -3579,6 +3622,10 @@ static void __sched notrace __schedule(bool preempt) if (!is_idle_task(prev) && !prev->on_rq) update_avg_burst(prev); +#ifdef CONFIG_SCHED_WALT + if (!prev->on_rq) + prev->last_sleep_ts = wallclock; +#endif rq->nr_switches++; rq->curr = next; ++*switch_count; @@ -3755,7 +3802,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void) int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags, void *key) { - return try_to_wake_up(curr->private, mode, wake_flags); + return try_to_wake_up(curr->private, mode, wake_flags, 1); } EXPORT_SYMBOL(default_wake_function); @@ -3781,6 +3828,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) BUG_ON(prio > MAX_PRIO); rq = __task_rq_lock(p); + update_rq_clock(rq); /* * Idle task boosting is a nono in general. There is one @@ -3876,6 +3924,8 @@ void set_user_nice(struct task_struct *p, long nice) * the task might be in the middle of scheduling on another CPU. */ rq = task_rq_lock(p, &flags); + update_rq_clock(rq); + /* * The RT priorities are set via sched_setscheduler(), but we still * allow the 'normal' nice value to be set - but as expected @@ -4303,6 +4353,7 @@ recheck: * runqueue lock must be held. */ rq = task_rq_lock(p, &flags); + update_rq_clock(rq); /* * Changing the policy of the stop threads its a very bad idea @@ -7151,6 +7202,19 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd) atomic_set(&sg->sgc->nr_busy_cpus, sg->group_weight); } +static bool have_sched_energy_data(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (!rcu_dereference(per_cpu(sd_scs, cpu)) || + !rcu_dereference(per_cpu(sd_ea, cpu))) + return false; + } + + return true; +} + /* * Check that the per-cpu provided sd energy data is consistent for all cpus * within the mask. @@ -7967,6 +8031,9 @@ static int build_sched_domains(const struct cpumask *cpu_map, } rcu_read_unlock(); + WARN(sched_feat(ENERGY_AWARE) && !have_sched_energy_data(), + "Missing data for energy aware scheduling\n"); + ret = 0; error: __free_domain_allocs(&d, alloc_state, cpu_map); @@ -8784,27 +8851,9 @@ void sched_offline_group(struct task_group *tg) spin_unlock_irqrestore(&task_group_lock, flags); } -/* change task's runqueue when it moves between groups. - * The caller of this function should have put the task in its new group - * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to - * reflect its new group. - */ -void sched_move_task(struct task_struct *tsk) +static void sched_change_group(struct task_struct *tsk, int type) { struct task_group *tg; - int queued, running; - unsigned long flags; - struct rq *rq; - - rq = task_rq_lock(tsk, &flags); - - running = task_current(rq, tsk); - queued = task_on_rq_queued(tsk); - - if (queued) - dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE); - if (unlikely(running)) - put_prev_task(rq, tsk); /* * All callers are synchronized by task_rq_lock(); we do not use RCU @@ -8817,11 +8866,37 @@ void sched_move_task(struct task_struct *tsk) tsk->sched_task_group = tg; #ifdef CONFIG_FAIR_GROUP_SCHED - if (tsk->sched_class->task_move_group) - tsk->sched_class->task_move_group(tsk); + if (tsk->sched_class->task_change_group) + tsk->sched_class->task_change_group(tsk, type); else #endif set_task_rq(tsk, task_cpu(tsk)); +} + +/* + * Change task's runqueue when it moves between groups. + * + * The caller of this function should have put the task in its new group by + * now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect + * its new group. + */ +void sched_move_task(struct task_struct *tsk) +{ + int queued, running; + unsigned long flags; + struct rq *rq; + + rq = task_rq_lock(tsk, &flags); + + running = task_current(rq, tsk); + queued = task_on_rq_queued(tsk); + + if (queued) + dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE); + if (unlikely(running)) + put_prev_task(rq, tsk); + + sched_change_group(tsk, TASK_MOVE_GROUP); if (unlikely(running)) tsk->sched_class->set_curr_task(rq); @@ -9258,15 +9333,28 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) sched_free_group(tg); } +/* + * This is called before wake_up_new_task(), therefore we really only + * have to set its group bits, all the other stuff does not apply. + */ static void cpu_cgroup_fork(struct task_struct *task, void *private) { - sched_move_task(task); + unsigned long flags; + struct rq *rq; + + rq = task_rq_lock(task, &flags); + + update_rq_clock(rq); + sched_change_group(task, TASK_SET_GROUP); + + task_rq_unlock(rq, task, &flags); } static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) { struct task_struct *task; struct cgroup_subsys_state *css; + int ret = 0; cgroup_taskset_for_each(task, css, tset) { #ifdef CONFIG_RT_GROUP_SCHED @@ -9277,8 +9365,24 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) if (task->sched_class != &fair_sched_class) return -EINVAL; #endif + /* + * Serialize against wake_up_new_task() such that if its + * running, we're sure to observe its full state. + */ + raw_spin_lock_irq(&task->pi_lock); + /* + * Avoid calling sched_move_task() before wake_up_new_task() + * has happened. This would lead to problems with PELT, due to + * move wanting to detach+attach while we're not attached yet. + */ + if (task->state == TASK_NEW) + ret = -EINVAL; + raw_spin_unlock_irq(&task->pi_lock); + + if (ret) + break; } - return 0; + return ret; } static void cpu_cgroup_attach(struct cgroup_taskset *tset) |
