diff options
author | Srinivasarao P <spathi@codeaurora.org> | 2017-12-18 14:14:52 +0530 |
---|---|---|
committer | Srinivasarao P <spathi@codeaurora.org> | 2017-12-18 14:23:14 +0530 |
commit | bb6e8073112e0a7dd53e2cbd166b20f585a5bb79 (patch) | |
tree | 792232b5cb98372221f93dcb1d8182dd8b79119b /kernel/sched | |
parent | 02a73d4553b036e9dd44b02a7ca874dc0555f86b (diff) | |
parent | 46d256da87ef1132f02d234b885110d96c6889eb (diff) |
Merge android-4.4.97 (46d256d) into msm-4.4
* refs/heads/tmp-46d256d
Linux 4.4.97
staging: r8712u: Fix Sparse warning in rtl871x_xmit.c
xen: don't print error message in case of missing Xenstore entry
bt8xx: fix memory leak
s390/dasd: check for device error pointer within state change interrupts
mei: return error on notification request to a disconnected client
exynos4-is: fimc-is: Unmap region obtained by of_iomap()
staging: lustre: ptlrpc: skip lock if export failed
staging: lustre: hsm: stack overrun in hai_dump_data_field
staging: lustre: llite: don't invoke direct_IO for the EOF case
platform/x86: intel_mid_thermal: Fix module autoload
scsi: aacraid: Process Error for response I/O
xen/manage: correct return value check on xenbus_scanf()
cx231xx: Fix I2C on Internal Master 3 Bus
perf tools: Only increase index if perf_evsel__new_idx() succeeds
drm/amdgpu: when dpm disabled, also need to stop/start vce.
i2c: riic: correctly finish transfers
ext4: do not use stripe_width if it is not set
ext4: fix stripe-unaligned allocations
staging: rtl8712u: Fix endian settings for structs describing network packets
mfd: axp20x: Fix axp288 PEK_DBR and PEK_DBF irqs being swapped
mfd: ab8500-sysctrl: Handle probe deferral
ARM: pxa: Don't rely on public mmc header to include leds.h
mmc: s3cmci: include linux/interrupt.h for tasklet_struct
PM / wakeirq: report a wakeup_event on dedicated wekup irq
Fix tracing sample code warning.
tracing/samples: Fix creation and deletion of simple_thread_fn creation
drm/msm: fix an integer overflow test
drm/msm: Fix potential buffer overflow issue
perf tools: Fix build failure on perl script context
ocfs2: fstrim: Fix start offset of first cluster group during fstrim
ARM: 8715/1: add a private asm/unaligned.h
ARM: dts: mvebu: pl310-cache disable double-linefill
arm64: ensure __dump_instr() checks addr_limit
ASoC: adau17x1: Workaround for noise bug in ADC
KEYS: fix out-of-bounds read during ASN.1 parsing
KEYS: return full count in keyring_read() if buffer is too small
cifs: check MaxPathNameComponentLength != 0 before using it
ALSA: seq: Fix nested rwsem annotation for lockdep splat
ALSA: timer: Add missing mutex lock for compat ioctls
BACKPORT: xfrm: Clear sk_dst_cache when applying per-socket policy.
Revert "ANDROID: sched/rt: schedtune: Add boost retention to RT"
cpufreq: Drop schedfreq governor
ANDROID: sched/rt: schedtune: Add boost retention to RT
ANDROID: sched/rt: add schedtune accounting
ANDROID: Revert "arm64: move ELF_ET_DYN_BASE to 4GB / 4MB"
ANDROID: Revert "arm: move ELF_ET_DYN_BASE to 4MB"
sched: EAS: Fix the calculation of group util in group_idle_state()
sched: EAS: update trg_cpu to backup_cpu if no energy saving for target_cpu
sched: EAS: Fix the condition to distinguish energy before/after
Conflicts:
drivers/cpufreq/Kconfig
drivers/gpu/drm/msm/msm_gem_submit.c
kernel/sched/core.c
kernel/sched/fair.c
kernel/sched/rt.c
kernel/sched/sched.h
Change-Id: I0d8c5287cb67fd47c8944a002c0ca71adcdef537
Signed-off-by: Srinivasarao P <spathi@codeaurora.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/Makefile | 1 | ||||
-rw-r--r-- | kernel/sched/core.c | 86 | ||||
-rw-r--r-- | kernel/sched/cpufreq_sched.c | 525 | ||||
-rw-r--r-- | kernel/sched/fair.c | 113 | ||||
-rw-r--r-- | kernel/sched/rt.c | 54 | ||||
-rw-r--r-- | kernel/sched/sched.h | 58 |
6 files changed, 19 insertions, 818 deletions
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index a353df46c8e4..64464b44a265 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -23,5 +23,4 @@ obj-$(CONFIG_SCHED_TUNE) += tune.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o obj-$(CONFIG_SCHED_CORE_CTL) += core_ctl.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o -obj-$(CONFIG_CPU_FREQ_GOV_SCHED) += cpufreq_sched.o obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9dd9640bfe82..1d91c012b5d8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3167,91 +3167,6 @@ unsigned long long task_sched_runtime(struct task_struct *p) return ns; } -#ifdef CONFIG_CPU_FREQ_GOV_SCHED - -static inline -unsigned long add_capacity_margin(unsigned long cpu_capacity) -{ - cpu_capacity = cpu_capacity * capacity_margin; - cpu_capacity /= SCHED_CAPACITY_SCALE; - return cpu_capacity; -} - -static inline -unsigned long sum_capacity_reqs(unsigned long cfs_cap, - struct sched_capacity_reqs *scr) -{ - unsigned long total = add_capacity_margin(cfs_cap + scr->rt); - return total += scr->dl; -} - -unsigned long boosted_cpu_util(int cpu); -static void sched_freq_tick_pelt(int cpu) -{ - unsigned long cpu_utilization = boosted_cpu_util(cpu); - unsigned long capacity_curr = capacity_curr_of(cpu); - struct sched_capacity_reqs *scr; - - scr = &per_cpu(cpu_sched_capacity_reqs, cpu); - if (sum_capacity_reqs(cpu_utilization, scr) < capacity_curr) - return; - - /* - * To make free room for a task that is building up its "real" - * utilization and to harm its performance the least, request - * a jump to a higher OPP as soon as the margin of free capacity - * is impacted (specified by capacity_margin). - * Remember CPU utilization in sched_capacity_reqs should be normalised. - */ - cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu); - set_cfs_cpu_capacity(cpu, true, cpu_utilization); -} - -#ifdef CONFIG_SCHED_WALT -static void sched_freq_tick_walt(int cpu) -{ - unsigned long cpu_utilization = cpu_util_freq(cpu); - unsigned long capacity_curr = capacity_curr_of(cpu); - - if (walt_disabled || !sysctl_sched_use_walt_cpu_util) - return sched_freq_tick_pelt(cpu); - - /* - * Add a margin to the WALT utilization to check if we will need to - * increase frequency. - * NOTE: WALT tracks a single CPU signal for all the scheduling - * classes, thus this margin is going to be added to the DL class as - * well, which is something we do not do in sched_freq_tick_pelt case. - */ - if (add_capacity_margin(cpu_utilization) <= capacity_curr) - return; - - /* - * It is likely that the load is growing so we - * keep the added margin in our request as an - * extra boost. - * Remember CPU utilization in sched_capacity_reqs should be normalised. - */ - cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu); - set_cfs_cpu_capacity(cpu, true, cpu_utilization); - -} -#define _sched_freq_tick(cpu) sched_freq_tick_walt(cpu) -#else -#define _sched_freq_tick(cpu) sched_freq_tick_pelt(cpu) -#endif /* CONFIG_SCHED_WALT */ - -static void sched_freq_tick(int cpu) -{ - if (!sched_freq()) - return; - - _sched_freq_tick(cpu); -} -#else -static inline void sched_freq_tick(int cpu) { } -#endif /* CONFIG_CPU_FREQ_GOV_SCHED */ - /* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. @@ -3278,7 +3193,6 @@ void scheduler_tick(void) wallclock = sched_ktime_clock(); update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0); early_notif = early_detection_notify(rq, wallclock); - sched_freq_tick(cpu); raw_spin_unlock(&rq->lock); if (early_notif) diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c deleted file mode 100644 index ec0aed7a8f96..000000000000 --- a/kernel/sched/cpufreq_sched.c +++ /dev/null @@ -1,525 +0,0 @@ -/* - * Copyright (C) 2015 Michael Turquette <mturquette@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/cpufreq.h> -#include <linux/module.h> -#include <linux/kthread.h> -#include <linux/percpu.h> -#include <linux/irq_work.h> -#include <linux/delay.h> -#include <linux/string.h> - -#define CREATE_TRACE_POINTS -#include <trace/events/cpufreq_sched.h> - -#include "sched.h" - -#define THROTTLE_DOWN_NSEC 50000000 /* 50ms default */ -#define THROTTLE_UP_NSEC 500000 /* 500us default */ - -struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE; -static bool __read_mostly cpufreq_driver_slow; - -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED -static struct cpufreq_governor cpufreq_gov_sched; -#endif - -static DEFINE_PER_CPU(unsigned long, enabled); -DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs); - -struct gov_tunables { - struct gov_attr_set attr_set; - unsigned int up_throttle_nsec; - unsigned int down_throttle_nsec; -}; - -/** - * gov_data - per-policy data internal to the governor - * @up_throttle: next throttling period expiry if increasing OPP - * @down_throttle: next throttling period expiry if decreasing OPP - * @up_throttle_nsec: throttle period length in nanoseconds if increasing OPP - * @down_throttle_nsec: throttle period length in nanoseconds if decreasing OPP - * @task: worker thread for dvfs transition that may block/sleep - * @irq_work: callback used to wake up worker thread - * @requested_freq: last frequency requested by the sched governor - * - * struct gov_data is the per-policy cpufreq_sched-specific data structure. A - * per-policy instance of it is created when the cpufreq_sched governor receives - * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data - * member of struct cpufreq_policy. - * - * Readers of this data must call down_read(policy->rwsem). Writers must - * call down_write(policy->rwsem). - */ -struct gov_data { - ktime_t up_throttle; - ktime_t down_throttle; - struct gov_tunables *tunables; - struct list_head tunables_hook; - struct task_struct *task; - struct irq_work irq_work; - unsigned int requested_freq; -}; - -static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy, - unsigned int freq) -{ - struct gov_data *gd = policy->governor_data; - - /* avoid race with cpufreq_sched_stop */ - if (!down_write_trylock(&policy->rwsem)) - return; - - __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L); - - gd->up_throttle = ktime_add_ns(ktime_get(), - gd->tunables->up_throttle_nsec); - gd->down_throttle = ktime_add_ns(ktime_get(), - gd->tunables->down_throttle_nsec); - up_write(&policy->rwsem); -} - -static bool finish_last_request(struct gov_data *gd, unsigned int cur_freq) -{ - ktime_t now = ktime_get(); - - ktime_t throttle = gd->requested_freq < cur_freq ? - gd->down_throttle : gd->up_throttle; - - if (ktime_after(now, throttle)) - return false; - - while (1) { - int usec_left = ktime_to_ns(ktime_sub(throttle, now)); - - usec_left /= NSEC_PER_USEC; - trace_cpufreq_sched_throttled(usec_left); - usleep_range(usec_left, usec_left + 100); - now = ktime_get(); - if (ktime_after(now, throttle)) - return true; - } -} - -/* - * we pass in struct cpufreq_policy. This is safe because changing out the - * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP), - * which tears down all of the data structures and __cpufreq_governor(policy, - * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the - * new policy pointer - */ -static int cpufreq_sched_thread(void *data) -{ - struct sched_param param; - struct cpufreq_policy *policy; - struct gov_data *gd; - unsigned int new_request = 0; - unsigned int last_request = 0; - int ret; - - policy = (struct cpufreq_policy *) data; - gd = policy->governor_data; - - param.sched_priority = 50; - ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, ¶m); - if (ret) { - pr_warn("%s: failed to set SCHED_FIFO\n", __func__); - do_exit(-EINVAL); - } else { - pr_debug("%s: kthread (%d) set to SCHED_FIFO\n", - __func__, gd->task->pid); - } - - do { - new_request = gd->requested_freq; - if (new_request == last_request) { - set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) - break; - schedule(); - } else { - /* - * if the frequency thread sleeps while waiting to be - * unthrottled, start over to check for a newer request - */ - if (finish_last_request(gd, policy->cur)) - continue; - last_request = new_request; - cpufreq_sched_try_driver_target(policy, new_request); - } - } while (!kthread_should_stop()); - - return 0; -} - -static void cpufreq_sched_irq_work(struct irq_work *irq_work) -{ - struct gov_data *gd; - - gd = container_of(irq_work, struct gov_data, irq_work); - if (!gd) - return; - - wake_up_process(gd->task); -} - -static void update_fdomain_capacity_request(int cpu) -{ - unsigned int freq_new, index_new, cpu_tmp; - struct cpufreq_policy *policy; - struct gov_data *gd; - unsigned long capacity = 0; - - /* - * Avoid grabbing the policy if possible. A test is still - * required after locking the CPU's policy to avoid racing - * with the governor changing. - */ - if (!per_cpu(enabled, cpu)) - return; - - policy = cpufreq_cpu_get(cpu); - if (IS_ERR_OR_NULL(policy)) - return; - - if (policy->governor != &cpufreq_gov_sched || - !policy->governor_data) - goto out; - - gd = policy->governor_data; - - /* find max capacity requested by cpus in this policy */ - for_each_cpu(cpu_tmp, policy->cpus) { - struct sched_capacity_reqs *scr; - - scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp); - capacity = max(capacity, scr->total); - } - - /* Convert the new maximum capacity request into a cpu frequency */ - freq_new = capacity * policy->cpuinfo.max_freq >> SCHED_CAPACITY_SHIFT; - if (cpufreq_frequency_table_target(policy, policy->freq_table, - freq_new, CPUFREQ_RELATION_L, - &index_new)) - goto out; - freq_new = policy->freq_table[index_new].frequency; - - if (freq_new > policy->max) - freq_new = policy->max; - - if (freq_new < policy->min) - freq_new = policy->min; - - trace_cpufreq_sched_request_opp(cpu, capacity, freq_new, - gd->requested_freq); - if (freq_new == gd->requested_freq) - goto out; - - gd->requested_freq = freq_new; - - /* - * Throttling is not yet supported on platforms with fast cpufreq - * drivers. - */ - if (cpufreq_driver_slow) - irq_work_queue_on(&gd->irq_work, cpu); - else - cpufreq_sched_try_driver_target(policy, freq_new); - -out: - cpufreq_cpu_put(policy); -} - -#ifdef CONFIG_SCHED_WALT -static inline unsigned long -requested_capacity(struct sched_capacity_reqs *scr) -{ - if (!walt_disabled && sysctl_sched_use_walt_cpu_util) - return scr->cfs; - return scr->cfs + scr->rt; -} -#else -#define requested_capacity(scr) (scr->cfs + scr->rt) -#endif - -void update_cpu_capacity_request(int cpu, bool request) -{ - unsigned long new_capacity; - struct sched_capacity_reqs *scr; - - /* The rq lock serializes access to the CPU's sched_capacity_reqs. */ - lockdep_assert_held(&cpu_rq(cpu)->lock); - - scr = &per_cpu(cpu_sched_capacity_reqs, cpu); - - new_capacity = requested_capacity(scr); - new_capacity = new_capacity * capacity_margin - / SCHED_CAPACITY_SCALE; - new_capacity += scr->dl; - - if (new_capacity == scr->total) - return; - - trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity); - - scr->total = new_capacity; - if (request) - update_fdomain_capacity_request(cpu); -} - -static inline void set_sched_freq(void) -{ - static_key_slow_inc(&__sched_freq); -} - -static inline void clear_sched_freq(void) -{ - static_key_slow_dec(&__sched_freq); -} - -/* Tunables */ -static struct gov_tunables *global_tunables; - -static inline struct gov_tunables *to_tunables(struct gov_attr_set *attr_set) -{ - return container_of(attr_set, struct gov_tunables, attr_set); -} - -static ssize_t up_throttle_nsec_show(struct gov_attr_set *attr_set, char *buf) -{ - struct gov_tunables *tunables = to_tunables(attr_set); - - return sprintf(buf, "%u\n", tunables->up_throttle_nsec); -} - -static ssize_t up_throttle_nsec_store(struct gov_attr_set *attr_set, - const char *buf, size_t count) -{ - struct gov_tunables *tunables = to_tunables(attr_set); - int ret; - long unsigned int val; - - ret = kstrtoul(buf, 0, &val); - if (ret < 0) - return ret; - tunables->up_throttle_nsec = val; - return count; -} - -static ssize_t down_throttle_nsec_show(struct gov_attr_set *attr_set, char *buf) -{ - struct gov_tunables *tunables = to_tunables(attr_set); - - return sprintf(buf, "%u\n", tunables->down_throttle_nsec); -} - -static ssize_t down_throttle_nsec_store(struct gov_attr_set *attr_set, - const char *buf, size_t count) -{ - struct gov_tunables *tunables = to_tunables(attr_set); - int ret; - long unsigned int val; - - ret = kstrtoul(buf, 0, &val); - if (ret < 0) - return ret; - tunables->down_throttle_nsec = val; - return count; -} - -static struct governor_attr up_throttle_nsec = __ATTR_RW(up_throttle_nsec); -static struct governor_attr down_throttle_nsec = __ATTR_RW(down_throttle_nsec); - -static struct attribute *schedfreq_attributes[] = { - &up_throttle_nsec.attr, - &down_throttle_nsec.attr, - NULL -}; - -static struct kobj_type tunables_ktype = { - .default_attrs = schedfreq_attributes, - .sysfs_ops = &governor_sysfs_ops, -}; - -static int cpufreq_sched_policy_init(struct cpufreq_policy *policy) -{ - struct gov_data *gd; - int cpu; - int rc; - - for_each_cpu(cpu, policy->cpus) - memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0, - sizeof(struct sched_capacity_reqs)); - - gd = kzalloc(sizeof(*gd), GFP_KERNEL); - if (!gd) - return -ENOMEM; - - policy->governor_data = gd; - - if (!global_tunables) { - gd->tunables = kzalloc(sizeof(*gd->tunables), GFP_KERNEL); - if (!gd->tunables) - goto free_gd; - - gd->tunables->up_throttle_nsec = - policy->cpuinfo.transition_latency ? - policy->cpuinfo.transition_latency : - THROTTLE_UP_NSEC; - gd->tunables->down_throttle_nsec = - THROTTLE_DOWN_NSEC; - - rc = kobject_init_and_add(&gd->tunables->attr_set.kobj, - &tunables_ktype, - get_governor_parent_kobj(policy), - "%s", cpufreq_gov_sched.name); - if (rc) - goto free_tunables; - - gov_attr_set_init(&gd->tunables->attr_set, - &gd->tunables_hook); - - pr_debug("%s: throttle_threshold = %u [ns]\n", - __func__, gd->tunables->up_throttle_nsec); - - if (!have_governor_per_policy()) - global_tunables = gd->tunables; - } else { - gd->tunables = global_tunables; - gov_attr_set_get(&global_tunables->attr_set, - &gd->tunables_hook); - } - - policy->governor_data = gd; - if (cpufreq_driver_is_slow()) { - cpufreq_driver_slow = true; - gd->task = kthread_create(cpufreq_sched_thread, policy, - "kschedfreq:%d", - cpumask_first(policy->related_cpus)); - if (IS_ERR_OR_NULL(gd->task)) { - pr_err("%s: failed to create kschedfreq thread\n", - __func__); - goto free_tunables; - } - get_task_struct(gd->task); - kthread_bind_mask(gd->task, policy->related_cpus); - wake_up_process(gd->task); - init_irq_work(&gd->irq_work, cpufreq_sched_irq_work); - } - - set_sched_freq(); - - return 0; - -free_tunables: - kfree(gd->tunables); -free_gd: - policy->governor_data = NULL; - kfree(gd); - return -ENOMEM; -} - -static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy) -{ - unsigned int count; - struct gov_data *gd = policy->governor_data; - - clear_sched_freq(); - if (cpufreq_driver_slow) { - kthread_stop(gd->task); - put_task_struct(gd->task); - } - - count = gov_attr_set_put(&gd->tunables->attr_set, &gd->tunables_hook); - if (!count) { - if (!have_governor_per_policy()) - global_tunables = NULL; - kfree(gd->tunables); - } - - policy->governor_data = NULL; - - kfree(gd); - return 0; -} - -static int cpufreq_sched_start(struct cpufreq_policy *policy) -{ - int cpu; - - for_each_cpu(cpu, policy->cpus) - per_cpu(enabled, cpu) = 1; - - return 0; -} - -static void cpufreq_sched_limits(struct cpufreq_policy *policy) -{ - unsigned int clamp_freq; - struct gov_data *gd = policy->governor_data;; - - pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n", - policy->cpu, policy->min, policy->max, - policy->cur); - - clamp_freq = clamp(gd->requested_freq, policy->min, policy->max); - - if (policy->cur != clamp_freq) - __cpufreq_driver_target(policy, clamp_freq, CPUFREQ_RELATION_L); -} - -static int cpufreq_sched_stop(struct cpufreq_policy *policy) -{ - int cpu; - - for_each_cpu(cpu, policy->cpus) - per_cpu(enabled, cpu) = 0; - - return 0; -} - -static int cpufreq_sched_setup(struct cpufreq_policy *policy, - unsigned int event) -{ - switch (event) { - case CPUFREQ_GOV_POLICY_INIT: - return cpufreq_sched_policy_init(policy); - case CPUFREQ_GOV_POLICY_EXIT: - return cpufreq_sched_policy_exit(policy); - case CPUFREQ_GOV_START: - return cpufreq_sched_start(policy); - case CPUFREQ_GOV_STOP: - return cpufreq_sched_stop(policy); - case CPUFREQ_GOV_LIMITS: - cpufreq_sched_limits(policy); - break; - } - return 0; -} - - -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED -static -#endif -struct cpufreq_governor cpufreq_gov_sched = { - .name = "sched", - .governor = cpufreq_sched_setup, - .owner = THIS_MODULE, -}; - -static int __init cpufreq_sched_init(void) -{ - int cpu; - - for_each_cpu(cpu, cpu_possible_mask) - per_cpu(enabled, cpu) = 0; - return cpufreq_register_governor(&cpufreq_gov_sched); -} - -/* Try to make this the default governor */ -fs_initcall(cpufreq_sched_init); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b3a8411bac2b..e515311aa93c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -53,7 +53,6 @@ unsigned int sysctl_sched_latency = 6000000ULL; unsigned int normalized_sysctl_sched_latency = 6000000ULL; unsigned int sysctl_sched_sync_hint_enable = 1; -unsigned int sysctl_sched_initial_task_util = 0; unsigned int sysctl_sched_cstate_aware = 1; /* @@ -746,9 +745,7 @@ void init_entity_runnable_average(struct sched_entity *se) sa->load_sum = sa->load_avg * LOAD_AVG_MAX; /* * In previous Android versions, we used to have: - * sa->util_avg = sched_freq() ? - * sysctl_sched_initial_task_util : - * scale_load_down(SCHED_LOAD_SCALE); + * sa->util_avg = scale_load_down(SCHED_LOAD_SCALE); * sa->util_sum = sa->util_avg * LOAD_AVG_MAX; * However, that functionality has been moved to enqueue. * It is unclear if we should restore this in enqueue. @@ -5759,23 +5756,6 @@ unsigned long boosted_cpu_util(int cpu); #define boosted_cpu_util(cpu) cpu_util_freq(cpu) #endif -#if defined(CONFIG_SMP) && defined(CONFIG_CPU_FREQ_GOV_SCHED) -static void update_capacity_of(int cpu) -{ - unsigned long req_cap; - - if (!sched_freq()) - return; - - /* Normalize scale-invariant capacity to cpu. */ - req_cap = boosted_cpu_util(cpu); - req_cap = req_cap * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu); - set_cfs_cpu_capacity(cpu, true, req_cap); -} -#else -#define update_capacity_of(X) do {} while(0) -#endif /* SMP and CPU_FREQ_GOV_SCHED */ - /* * The enqueue_task method is called before nr_running is * increased. Here we update the fair scheduling stats and @@ -5788,7 +5768,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct sched_entity *se = &p->se; #ifdef CONFIG_SMP int task_new = flags & ENQUEUE_WAKEUP_NEW; - int task_wakeup = flags & ENQUEUE_WAKEUP; #endif /* @@ -5863,19 +5842,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) rq->rd->overutilized = true; trace_sched_overutilized(true); } - - } - - if (!se) { - /* - * We want to potentially trigger a freq switch - * request only for tasks that are waking up; this is - * because we get here also during load balancing, but - * in these cases it seems wise to trigger as single - * request after load balancing is done. - */ - if (task_new || task_wakeup) - update_capacity_of(cpu_of(rq)); } #endif /* CONFIG_SMP */ @@ -5953,23 +5919,6 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) */ schedtune_dequeue_task(p, cpu_of(rq)); - if (!se) { - /* - * We want to potentially trigger a freq switch - * request only for tasks that are going to sleep; - * this is because we get here also during load - * balancing, but in these cases it seems wise to - * trigger as single request after load balancing is - * done. - */ - if (task_sleep) { - if (rq->cfs.nr_running) - update_capacity_of(cpu_of(rq)); - else if (sched_freq()) - set_cfs_cpu_capacity(cpu_of(rq), false, 0); /* no normalization required for 0 */ - } - } - #endif /* CONFIG_SMP */ hrtick_update(rq); @@ -6521,13 +6470,6 @@ static int group_idle_state(struct energy_env *eenv, struct sched_group *sg) /* Take non-cpuidle idling into account (active idle/arch_cpu_idle()) */ state++; - /* - * Try to estimate if a deeper idle state is - * achievable when we move the task. - */ - for_each_cpu(i, sched_group_cpus(sg)) - grp_util += cpu_util(i); - src_in_grp = cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg)); dst_in_grp = cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg)); if (src_in_grp == dst_in_grp) { @@ -6536,10 +6478,16 @@ static int group_idle_state(struct energy_env *eenv, struct sched_group *sg) */ goto end; } - /* add or remove util as appropriate to indicate what group util - * will be (worst case - no concurrent execution) after moving the task + + /* + * Try to estimate if a deeper idle state is + * achievable when we move the task. */ - grp_util += src_in_grp ? -eenv->util_delta : eenv->util_delta; + for_each_cpu(i, sched_group_cpus(sg)) { + grp_util += cpu_util_wake(i, eenv->task); + if (unlikely(i == eenv->trg_cpu)) + grp_util += eenv->util_delta; + } if (grp_util <= ((long)sg->sgc->max_capacity * (int)sg->group_weight)) { @@ -6626,13 +6574,13 @@ static int sched_group_energy(struct energy_env *eenv) if (sg->group_weight == 1) { /* Remove capacity of src CPU (before task move) */ - if (eenv->util_delta == 0 && + if (eenv->trg_cpu == eenv->src_cpu && cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg))) { eenv->cap.before = sg->sge->cap_states[cap_idx].cap; eenv->cap.delta -= eenv->cap.before; } /* Add capacity of dst CPU (after task move) */ - if (eenv->util_delta != 0 && + if (eenv->trg_cpu == eenv->dst_cpu && cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg))) { eenv->cap.after = sg->sge->cap_states[cap_idx].cap; eenv->cap.delta += eenv->cap.after; @@ -7813,6 +7761,7 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync /* No energy saving for target_cpu, try backup */ target_cpu = tmp_backup; eenv.dst_cpu = target_cpu; + eenv.trg_cpu = target_cpu; if (tmp_backup < 0 || tmp_backup == prev_cpu || energy_diff(&eenv) >= 0) { @@ -8865,10 +8814,6 @@ static void attach_one_task(struct rq *rq, struct task_struct *p) { raw_spin_lock(&rq->lock); attach_task(rq, p); - /* - * We want to potentially raise target_cpu's OPP. - */ - update_capacity_of(cpu_of(rq)); raw_spin_unlock(&rq->lock); } @@ -8890,11 +8835,6 @@ static void attach_tasks(struct lb_env *env) attach_task(env->dst_rq, p); } - /* - * We want to potentially raise env.dst_cpu's OPP. - */ - update_capacity_of(env->dst_cpu); - raw_spin_unlock(&env->dst_rq->lock); } @@ -10454,11 +10394,6 @@ more_balance: * ld_moved - cumulative load moved across iterations */ cur_ld_moved = detach_tasks(&env); - /* - * We want to potentially lower env.src_cpu's OPP. - */ - if (cur_ld_moved) - update_capacity_of(env.src_cpu); /* * We've detached some tasks from busiest_rq. Every @@ -10708,7 +10643,6 @@ static int idle_balance(struct rq *this_rq) struct sched_domain *sd; int pulled_task = 0; u64 curr_cost = 0; - long removed_util=0; if (cpu_isolated(this_cpu)) return 0; @@ -10735,17 +10669,6 @@ static int idle_balance(struct rq *this_rq) raw_spin_unlock(&this_rq->lock); - /* - * If removed_util_avg is !0 we most probably migrated some task away - * from this_cpu. In this case we might be willing to trigger an OPP - * update, but we want to do so if we don't find anybody else to pull - * here (we will trigger an OPP update with the pulled task's enqueue - * anyway). - * - * Record removed_util before calling update_blocked_averages, and use - * it below (before returning) to see if an OPP update is required. - */ - removed_util = atomic_long_read(&(this_rq->cfs).removed_util_avg); update_blocked_averages(this_cpu); rcu_read_lock(); for_each_domain(this_cpu, sd) { @@ -10813,12 +10736,6 @@ out: if (pulled_task) { idle_exit_fair(this_rq); this_rq->idle_stamp = 0; - } else if (removed_util) { - /* - * No task pulled and someone has been migrated away. - * Good case to trigger an OPP update. - */ - update_capacity_of(this_cpu); } return pulled_task; @@ -10903,10 +10820,6 @@ static int active_load_balance_cpu_stop(void *data) p = detach_one_task(&env); if (p) { schedstat_inc(sd, alb_pushed); - /* - * We want to potentially lower env.src_cpu's OPP. - */ - update_capacity_of(env.src_cpu); moved = true; } else { schedstat_inc(sd, alb_failed); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 47e97ef57eb8..af6a7f424d94 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -10,6 +10,8 @@ #include <linux/irq_work.h> #include <trace/events/sched.h> +#include "tune.h" + int sched_rr_timeslice = RR_TIMESLICE; static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); @@ -1394,6 +1396,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) if (!task_current(rq, p) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); + + schedtune_enqueue_task(p, cpu_of(rq)); } static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) @@ -1405,6 +1409,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) dec_hmp_sched_stats_rt(rq, p); dequeue_pushable_task(rq, p); + schedtune_dequeue_task(p, cpu_of(rq)); } /* @@ -1612,41 +1617,6 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag #endif } -#if defined(CONFIG_SMP) && defined(CONFIG_CPU_FREQ_GOV_SCHED) -static void sched_rt_update_capacity_req(struct rq *rq) -{ - u64 total, used, age_stamp, avg; - s64 delta; - - if (!sched_freq()) - return; - - sched_avg_update(rq); - /* - * Since we're reading these variables without serialization make sure - * we read them once before doing sanity checks on them. - */ - age_stamp = READ_ONCE(rq->age_stamp); - avg = READ_ONCE(rq->rt_avg); - delta = rq_clock(rq) - age_stamp; - - if (unlikely(delta < 0)) - delta = 0; - - total = sched_avg_period() + delta; - - used = div_u64(avg, total); - if (unlikely(used > SCHED_CAPACITY_SCALE)) - used = SCHED_CAPACITY_SCALE; - - set_rt_cpu_capacity(rq->cpu, 1, (unsigned long)(used)); -} -#else -static inline void sched_rt_update_capacity_req(struct rq *rq) -{ } - -#endif - static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, struct rt_rq *rt_rq) { @@ -1715,17 +1685,8 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev) if (prev->sched_class == &rt_sched_class) update_curr_rt(rq); - if (!rt_rq->rt_queued) { - /* - * The next task to be picked on this rq will have a lower - * priority than rt tasks so we can spend some time to update - * the capacity used by rt tasks based on the last activity. - * This value will be the used as an estimation of the next - * activity. - */ - sched_rt_update_capacity_req(rq); + if (!rt_rq->rt_queued) return NULL; - } put_prev_task(rq, prev); @@ -2558,9 +2519,6 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) update_curr_rt(rq); - if (rq->rt.rt_nr_running) - sched_rt_update_capacity_req(rq); - watchdog(rq, p); /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index df2fd16ca076..7426ae4dced3 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2422,64 +2422,6 @@ static inline unsigned long cpu_util_freq(int cpu) #endif -#ifdef CONFIG_CPU_FREQ_GOV_SCHED -#define capacity_max SCHED_CAPACITY_SCALE -extern unsigned int capacity_margin; -extern struct static_key __sched_freq; - -static inline bool sched_freq(void) -{ - return static_key_false(&__sched_freq); -} - -/* - * sched_capacity_reqs expects capacity requests to be normalised. - * All capacities should sum to the range of 0-1024. - */ -DECLARE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs); -void update_cpu_capacity_request(int cpu, bool request); - -static inline void set_cfs_cpu_capacity(int cpu, bool request, - unsigned long capacity) -{ - struct sched_capacity_reqs *scr = &per_cpu(cpu_sched_capacity_reqs, cpu); - - if (scr->cfs != capacity) { - scr->cfs = capacity; - update_cpu_capacity_request(cpu, request); - } -} - -static inline void set_rt_cpu_capacity(int cpu, bool request, - unsigned long capacity) -{ - if (per_cpu(cpu_sched_capacity_reqs, cpu).rt != capacity) { - per_cpu(cpu_sched_capacity_reqs, cpu).rt = capacity; - update_cpu_capacity_request(cpu, request); - } -} - -static inline void set_dl_cpu_capacity(int cpu, bool request, - unsigned long capacity) -{ - if (per_cpu(cpu_sched_capacity_reqs, cpu).dl != capacity) { - per_cpu(cpu_sched_capacity_reqs, cpu).dl = capacity; - update_cpu_capacity_request(cpu, request); - } -} -#else -#define sched_freq() false -static inline void set_cfs_cpu_capacity(int cpu, bool request, - unsigned long capacity) -{ } -static inline void set_rt_cpu_capacity(int cpu, bool request, - unsigned long capacity) -{ } -static inline void set_dl_cpu_capacity(int cpu, bool request, - unsigned long capacity) -{ } -#endif - #ifdef CONFIG_SCHED_HMP /* * HMP and EAS are orthogonal. Hopefully the compiler just elides out all code |