summaryrefslogtreecommitdiff
path: root/kernel/sched
diff options
context:
space:
mode:
authorSrinivasarao P <spathi@codeaurora.org>2017-12-18 14:14:52 +0530
committerSrinivasarao P <spathi@codeaurora.org>2017-12-18 14:23:14 +0530
commitbb6e8073112e0a7dd53e2cbd166b20f585a5bb79 (patch)
tree792232b5cb98372221f93dcb1d8182dd8b79119b /kernel/sched
parent02a73d4553b036e9dd44b02a7ca874dc0555f86b (diff)
parent46d256da87ef1132f02d234b885110d96c6889eb (diff)
Merge android-4.4.97 (46d256d) into msm-4.4
* refs/heads/tmp-46d256d Linux 4.4.97 staging: r8712u: Fix Sparse warning in rtl871x_xmit.c xen: don't print error message in case of missing Xenstore entry bt8xx: fix memory leak s390/dasd: check for device error pointer within state change interrupts mei: return error on notification request to a disconnected client exynos4-is: fimc-is: Unmap region obtained by of_iomap() staging: lustre: ptlrpc: skip lock if export failed staging: lustre: hsm: stack overrun in hai_dump_data_field staging: lustre: llite: don't invoke direct_IO for the EOF case platform/x86: intel_mid_thermal: Fix module autoload scsi: aacraid: Process Error for response I/O xen/manage: correct return value check on xenbus_scanf() cx231xx: Fix I2C on Internal Master 3 Bus perf tools: Only increase index if perf_evsel__new_idx() succeeds drm/amdgpu: when dpm disabled, also need to stop/start vce. i2c: riic: correctly finish transfers ext4: do not use stripe_width if it is not set ext4: fix stripe-unaligned allocations staging: rtl8712u: Fix endian settings for structs describing network packets mfd: axp20x: Fix axp288 PEK_DBR and PEK_DBF irqs being swapped mfd: ab8500-sysctrl: Handle probe deferral ARM: pxa: Don't rely on public mmc header to include leds.h mmc: s3cmci: include linux/interrupt.h for tasklet_struct PM / wakeirq: report a wakeup_event on dedicated wekup irq Fix tracing sample code warning. tracing/samples: Fix creation and deletion of simple_thread_fn creation drm/msm: fix an integer overflow test drm/msm: Fix potential buffer overflow issue perf tools: Fix build failure on perl script context ocfs2: fstrim: Fix start offset of first cluster group during fstrim ARM: 8715/1: add a private asm/unaligned.h ARM: dts: mvebu: pl310-cache disable double-linefill arm64: ensure __dump_instr() checks addr_limit ASoC: adau17x1: Workaround for noise bug in ADC KEYS: fix out-of-bounds read during ASN.1 parsing KEYS: return full count in keyring_read() if buffer is too small cifs: check MaxPathNameComponentLength != 0 before using it ALSA: seq: Fix nested rwsem annotation for lockdep splat ALSA: timer: Add missing mutex lock for compat ioctls BACKPORT: xfrm: Clear sk_dst_cache when applying per-socket policy. Revert "ANDROID: sched/rt: schedtune: Add boost retention to RT" cpufreq: Drop schedfreq governor ANDROID: sched/rt: schedtune: Add boost retention to RT ANDROID: sched/rt: add schedtune accounting ANDROID: Revert "arm64: move ELF_ET_DYN_BASE to 4GB / 4MB" ANDROID: Revert "arm: move ELF_ET_DYN_BASE to 4MB" sched: EAS: Fix the calculation of group util in group_idle_state() sched: EAS: update trg_cpu to backup_cpu if no energy saving for target_cpu sched: EAS: Fix the condition to distinguish energy before/after Conflicts: drivers/cpufreq/Kconfig drivers/gpu/drm/msm/msm_gem_submit.c kernel/sched/core.c kernel/sched/fair.c kernel/sched/rt.c kernel/sched/sched.h Change-Id: I0d8c5287cb67fd47c8944a002c0ca71adcdef537 Signed-off-by: Srinivasarao P <spathi@codeaurora.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/Makefile1
-rw-r--r--kernel/sched/core.c86
-rw-r--r--kernel/sched/cpufreq_sched.c525
-rw-r--r--kernel/sched/fair.c113
-rw-r--r--kernel/sched/rt.c54
-rw-r--r--kernel/sched/sched.h58
6 files changed, 19 insertions, 818 deletions
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index a353df46c8e4..64464b44a265 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -23,5 +23,4 @@ obj-$(CONFIG_SCHED_TUNE) += tune.o
obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
obj-$(CONFIG_SCHED_CORE_CTL) += core_ctl.o
obj-$(CONFIG_CPU_FREQ) += cpufreq.o
-obj-$(CONFIG_CPU_FREQ_GOV_SCHED) += cpufreq_sched.o
obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9dd9640bfe82..1d91c012b5d8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3167,91 +3167,6 @@ unsigned long long task_sched_runtime(struct task_struct *p)
return ns;
}
-#ifdef CONFIG_CPU_FREQ_GOV_SCHED
-
-static inline
-unsigned long add_capacity_margin(unsigned long cpu_capacity)
-{
- cpu_capacity = cpu_capacity * capacity_margin;
- cpu_capacity /= SCHED_CAPACITY_SCALE;
- return cpu_capacity;
-}
-
-static inline
-unsigned long sum_capacity_reqs(unsigned long cfs_cap,
- struct sched_capacity_reqs *scr)
-{
- unsigned long total = add_capacity_margin(cfs_cap + scr->rt);
- return total += scr->dl;
-}
-
-unsigned long boosted_cpu_util(int cpu);
-static void sched_freq_tick_pelt(int cpu)
-{
- unsigned long cpu_utilization = boosted_cpu_util(cpu);
- unsigned long capacity_curr = capacity_curr_of(cpu);
- struct sched_capacity_reqs *scr;
-
- scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
- if (sum_capacity_reqs(cpu_utilization, scr) < capacity_curr)
- return;
-
- /*
- * To make free room for a task that is building up its "real"
- * utilization and to harm its performance the least, request
- * a jump to a higher OPP as soon as the margin of free capacity
- * is impacted (specified by capacity_margin).
- * Remember CPU utilization in sched_capacity_reqs should be normalised.
- */
- cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
- set_cfs_cpu_capacity(cpu, true, cpu_utilization);
-}
-
-#ifdef CONFIG_SCHED_WALT
-static void sched_freq_tick_walt(int cpu)
-{
- unsigned long cpu_utilization = cpu_util_freq(cpu);
- unsigned long capacity_curr = capacity_curr_of(cpu);
-
- if (walt_disabled || !sysctl_sched_use_walt_cpu_util)
- return sched_freq_tick_pelt(cpu);
-
- /*
- * Add a margin to the WALT utilization to check if we will need to
- * increase frequency.
- * NOTE: WALT tracks a single CPU signal for all the scheduling
- * classes, thus this margin is going to be added to the DL class as
- * well, which is something we do not do in sched_freq_tick_pelt case.
- */
- if (add_capacity_margin(cpu_utilization) <= capacity_curr)
- return;
-
- /*
- * It is likely that the load is growing so we
- * keep the added margin in our request as an
- * extra boost.
- * Remember CPU utilization in sched_capacity_reqs should be normalised.
- */
- cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
- set_cfs_cpu_capacity(cpu, true, cpu_utilization);
-
-}
-#define _sched_freq_tick(cpu) sched_freq_tick_walt(cpu)
-#else
-#define _sched_freq_tick(cpu) sched_freq_tick_pelt(cpu)
-#endif /* CONFIG_SCHED_WALT */
-
-static void sched_freq_tick(int cpu)
-{
- if (!sched_freq())
- return;
-
- _sched_freq_tick(cpu);
-}
-#else
-static inline void sched_freq_tick(int cpu) { }
-#endif /* CONFIG_CPU_FREQ_GOV_SCHED */
-
/*
* This function gets called by the timer code, with HZ frequency.
* We call it with interrupts disabled.
@@ -3278,7 +3193,6 @@ void scheduler_tick(void)
wallclock = sched_ktime_clock();
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
early_notif = early_detection_notify(rq, wallclock);
- sched_freq_tick(cpu);
raw_spin_unlock(&rq->lock);
if (early_notif)
diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c
deleted file mode 100644
index ec0aed7a8f96..000000000000
--- a/kernel/sched/cpufreq_sched.c
+++ /dev/null
@@ -1,525 +0,0 @@
-/*
- * Copyright (C) 2015 Michael Turquette <mturquette@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/cpufreq.h>
-#include <linux/module.h>
-#include <linux/kthread.h>
-#include <linux/percpu.h>
-#include <linux/irq_work.h>
-#include <linux/delay.h>
-#include <linux/string.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/cpufreq_sched.h>
-
-#include "sched.h"
-
-#define THROTTLE_DOWN_NSEC 50000000 /* 50ms default */
-#define THROTTLE_UP_NSEC 500000 /* 500us default */
-
-struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE;
-static bool __read_mostly cpufreq_driver_slow;
-
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
-static struct cpufreq_governor cpufreq_gov_sched;
-#endif
-
-static DEFINE_PER_CPU(unsigned long, enabled);
-DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
-
-struct gov_tunables {
- struct gov_attr_set attr_set;
- unsigned int up_throttle_nsec;
- unsigned int down_throttle_nsec;
-};
-
-/**
- * gov_data - per-policy data internal to the governor
- * @up_throttle: next throttling period expiry if increasing OPP
- * @down_throttle: next throttling period expiry if decreasing OPP
- * @up_throttle_nsec: throttle period length in nanoseconds if increasing OPP
- * @down_throttle_nsec: throttle period length in nanoseconds if decreasing OPP
- * @task: worker thread for dvfs transition that may block/sleep
- * @irq_work: callback used to wake up worker thread
- * @requested_freq: last frequency requested by the sched governor
- *
- * struct gov_data is the per-policy cpufreq_sched-specific data structure. A
- * per-policy instance of it is created when the cpufreq_sched governor receives
- * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data
- * member of struct cpufreq_policy.
- *
- * Readers of this data must call down_read(policy->rwsem). Writers must
- * call down_write(policy->rwsem).
- */
-struct gov_data {
- ktime_t up_throttle;
- ktime_t down_throttle;
- struct gov_tunables *tunables;
- struct list_head tunables_hook;
- struct task_struct *task;
- struct irq_work irq_work;
- unsigned int requested_freq;
-};
-
-static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy,
- unsigned int freq)
-{
- struct gov_data *gd = policy->governor_data;
-
- /* avoid race with cpufreq_sched_stop */
- if (!down_write_trylock(&policy->rwsem))
- return;
-
- __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
-
- gd->up_throttle = ktime_add_ns(ktime_get(),
- gd->tunables->up_throttle_nsec);
- gd->down_throttle = ktime_add_ns(ktime_get(),
- gd->tunables->down_throttle_nsec);
- up_write(&policy->rwsem);
-}
-
-static bool finish_last_request(struct gov_data *gd, unsigned int cur_freq)
-{
- ktime_t now = ktime_get();
-
- ktime_t throttle = gd->requested_freq < cur_freq ?
- gd->down_throttle : gd->up_throttle;
-
- if (ktime_after(now, throttle))
- return false;
-
- while (1) {
- int usec_left = ktime_to_ns(ktime_sub(throttle, now));
-
- usec_left /= NSEC_PER_USEC;
- trace_cpufreq_sched_throttled(usec_left);
- usleep_range(usec_left, usec_left + 100);
- now = ktime_get();
- if (ktime_after(now, throttle))
- return true;
- }
-}
-
-/*
- * we pass in struct cpufreq_policy. This is safe because changing out the
- * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
- * which tears down all of the data structures and __cpufreq_governor(policy,
- * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
- * new policy pointer
- */
-static int cpufreq_sched_thread(void *data)
-{
- struct sched_param param;
- struct cpufreq_policy *policy;
- struct gov_data *gd;
- unsigned int new_request = 0;
- unsigned int last_request = 0;
- int ret;
-
- policy = (struct cpufreq_policy *) data;
- gd = policy->governor_data;
-
- param.sched_priority = 50;
- ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, &param);
- if (ret) {
- pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
- do_exit(-EINVAL);
- } else {
- pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
- __func__, gd->task->pid);
- }
-
- do {
- new_request = gd->requested_freq;
- if (new_request == last_request) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (kthread_should_stop())
- break;
- schedule();
- } else {
- /*
- * if the frequency thread sleeps while waiting to be
- * unthrottled, start over to check for a newer request
- */
- if (finish_last_request(gd, policy->cur))
- continue;
- last_request = new_request;
- cpufreq_sched_try_driver_target(policy, new_request);
- }
- } while (!kthread_should_stop());
-
- return 0;
-}
-
-static void cpufreq_sched_irq_work(struct irq_work *irq_work)
-{
- struct gov_data *gd;
-
- gd = container_of(irq_work, struct gov_data, irq_work);
- if (!gd)
- return;
-
- wake_up_process(gd->task);
-}
-
-static void update_fdomain_capacity_request(int cpu)
-{
- unsigned int freq_new, index_new, cpu_tmp;
- struct cpufreq_policy *policy;
- struct gov_data *gd;
- unsigned long capacity = 0;
-
- /*
- * Avoid grabbing the policy if possible. A test is still
- * required after locking the CPU's policy to avoid racing
- * with the governor changing.
- */
- if (!per_cpu(enabled, cpu))
- return;
-
- policy = cpufreq_cpu_get(cpu);
- if (IS_ERR_OR_NULL(policy))
- return;
-
- if (policy->governor != &cpufreq_gov_sched ||
- !policy->governor_data)
- goto out;
-
- gd = policy->governor_data;
-
- /* find max capacity requested by cpus in this policy */
- for_each_cpu(cpu_tmp, policy->cpus) {
- struct sched_capacity_reqs *scr;
-
- scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp);
- capacity = max(capacity, scr->total);
- }
-
- /* Convert the new maximum capacity request into a cpu frequency */
- freq_new = capacity * policy->cpuinfo.max_freq >> SCHED_CAPACITY_SHIFT;
- if (cpufreq_frequency_table_target(policy, policy->freq_table,
- freq_new, CPUFREQ_RELATION_L,
- &index_new))
- goto out;
- freq_new = policy->freq_table[index_new].frequency;
-
- if (freq_new > policy->max)
- freq_new = policy->max;
-
- if (freq_new < policy->min)
- freq_new = policy->min;
-
- trace_cpufreq_sched_request_opp(cpu, capacity, freq_new,
- gd->requested_freq);
- if (freq_new == gd->requested_freq)
- goto out;
-
- gd->requested_freq = freq_new;
-
- /*
- * Throttling is not yet supported on platforms with fast cpufreq
- * drivers.
- */
- if (cpufreq_driver_slow)
- irq_work_queue_on(&gd->irq_work, cpu);
- else
- cpufreq_sched_try_driver_target(policy, freq_new);
-
-out:
- cpufreq_cpu_put(policy);
-}
-
-#ifdef CONFIG_SCHED_WALT
-static inline unsigned long
-requested_capacity(struct sched_capacity_reqs *scr)
-{
- if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
- return scr->cfs;
- return scr->cfs + scr->rt;
-}
-#else
-#define requested_capacity(scr) (scr->cfs + scr->rt)
-#endif
-
-void update_cpu_capacity_request(int cpu, bool request)
-{
- unsigned long new_capacity;
- struct sched_capacity_reqs *scr;
-
- /* The rq lock serializes access to the CPU's sched_capacity_reqs. */
- lockdep_assert_held(&cpu_rq(cpu)->lock);
-
- scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
-
- new_capacity = requested_capacity(scr);
- new_capacity = new_capacity * capacity_margin
- / SCHED_CAPACITY_SCALE;
- new_capacity += scr->dl;
-
- if (new_capacity == scr->total)
- return;
-
- trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity);
-
- scr->total = new_capacity;
- if (request)
- update_fdomain_capacity_request(cpu);
-}
-
-static inline void set_sched_freq(void)
-{
- static_key_slow_inc(&__sched_freq);
-}
-
-static inline void clear_sched_freq(void)
-{
- static_key_slow_dec(&__sched_freq);
-}
-
-/* Tunables */
-static struct gov_tunables *global_tunables;
-
-static inline struct gov_tunables *to_tunables(struct gov_attr_set *attr_set)
-{
- return container_of(attr_set, struct gov_tunables, attr_set);
-}
-
-static ssize_t up_throttle_nsec_show(struct gov_attr_set *attr_set, char *buf)
-{
- struct gov_tunables *tunables = to_tunables(attr_set);
-
- return sprintf(buf, "%u\n", tunables->up_throttle_nsec);
-}
-
-static ssize_t up_throttle_nsec_store(struct gov_attr_set *attr_set,
- const char *buf, size_t count)
-{
- struct gov_tunables *tunables = to_tunables(attr_set);
- int ret;
- long unsigned int val;
-
- ret = kstrtoul(buf, 0, &val);
- if (ret < 0)
- return ret;
- tunables->up_throttle_nsec = val;
- return count;
-}
-
-static ssize_t down_throttle_nsec_show(struct gov_attr_set *attr_set, char *buf)
-{
- struct gov_tunables *tunables = to_tunables(attr_set);
-
- return sprintf(buf, "%u\n", tunables->down_throttle_nsec);
-}
-
-static ssize_t down_throttle_nsec_store(struct gov_attr_set *attr_set,
- const char *buf, size_t count)
-{
- struct gov_tunables *tunables = to_tunables(attr_set);
- int ret;
- long unsigned int val;
-
- ret = kstrtoul(buf, 0, &val);
- if (ret < 0)
- return ret;
- tunables->down_throttle_nsec = val;
- return count;
-}
-
-static struct governor_attr up_throttle_nsec = __ATTR_RW(up_throttle_nsec);
-static struct governor_attr down_throttle_nsec = __ATTR_RW(down_throttle_nsec);
-
-static struct attribute *schedfreq_attributes[] = {
- &up_throttle_nsec.attr,
- &down_throttle_nsec.attr,
- NULL
-};
-
-static struct kobj_type tunables_ktype = {
- .default_attrs = schedfreq_attributes,
- .sysfs_ops = &governor_sysfs_ops,
-};
-
-static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
-{
- struct gov_data *gd;
- int cpu;
- int rc;
-
- for_each_cpu(cpu, policy->cpus)
- memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0,
- sizeof(struct sched_capacity_reqs));
-
- gd = kzalloc(sizeof(*gd), GFP_KERNEL);
- if (!gd)
- return -ENOMEM;
-
- policy->governor_data = gd;
-
- if (!global_tunables) {
- gd->tunables = kzalloc(sizeof(*gd->tunables), GFP_KERNEL);
- if (!gd->tunables)
- goto free_gd;
-
- gd->tunables->up_throttle_nsec =
- policy->cpuinfo.transition_latency ?
- policy->cpuinfo.transition_latency :
- THROTTLE_UP_NSEC;
- gd->tunables->down_throttle_nsec =
- THROTTLE_DOWN_NSEC;
-
- rc = kobject_init_and_add(&gd->tunables->attr_set.kobj,
- &tunables_ktype,
- get_governor_parent_kobj(policy),
- "%s", cpufreq_gov_sched.name);
- if (rc)
- goto free_tunables;
-
- gov_attr_set_init(&gd->tunables->attr_set,
- &gd->tunables_hook);
-
- pr_debug("%s: throttle_threshold = %u [ns]\n",
- __func__, gd->tunables->up_throttle_nsec);
-
- if (!have_governor_per_policy())
- global_tunables = gd->tunables;
- } else {
- gd->tunables = global_tunables;
- gov_attr_set_get(&global_tunables->attr_set,
- &gd->tunables_hook);
- }
-
- policy->governor_data = gd;
- if (cpufreq_driver_is_slow()) {
- cpufreq_driver_slow = true;
- gd->task = kthread_create(cpufreq_sched_thread, policy,
- "kschedfreq:%d",
- cpumask_first(policy->related_cpus));
- if (IS_ERR_OR_NULL(gd->task)) {
- pr_err("%s: failed to create kschedfreq thread\n",
- __func__);
- goto free_tunables;
- }
- get_task_struct(gd->task);
- kthread_bind_mask(gd->task, policy->related_cpus);
- wake_up_process(gd->task);
- init_irq_work(&gd->irq_work, cpufreq_sched_irq_work);
- }
-
- set_sched_freq();
-
- return 0;
-
-free_tunables:
- kfree(gd->tunables);
-free_gd:
- policy->governor_data = NULL;
- kfree(gd);
- return -ENOMEM;
-}
-
-static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
-{
- unsigned int count;
- struct gov_data *gd = policy->governor_data;
-
- clear_sched_freq();
- if (cpufreq_driver_slow) {
- kthread_stop(gd->task);
- put_task_struct(gd->task);
- }
-
- count = gov_attr_set_put(&gd->tunables->attr_set, &gd->tunables_hook);
- if (!count) {
- if (!have_governor_per_policy())
- global_tunables = NULL;
- kfree(gd->tunables);
- }
-
- policy->governor_data = NULL;
-
- kfree(gd);
- return 0;
-}
-
-static int cpufreq_sched_start(struct cpufreq_policy *policy)
-{
- int cpu;
-
- for_each_cpu(cpu, policy->cpus)
- per_cpu(enabled, cpu) = 1;
-
- return 0;
-}
-
-static void cpufreq_sched_limits(struct cpufreq_policy *policy)
-{
- unsigned int clamp_freq;
- struct gov_data *gd = policy->governor_data;;
-
- pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n",
- policy->cpu, policy->min, policy->max,
- policy->cur);
-
- clamp_freq = clamp(gd->requested_freq, policy->min, policy->max);
-
- if (policy->cur != clamp_freq)
- __cpufreq_driver_target(policy, clamp_freq, CPUFREQ_RELATION_L);
-}
-
-static int cpufreq_sched_stop(struct cpufreq_policy *policy)
-{
- int cpu;
-
- for_each_cpu(cpu, policy->cpus)
- per_cpu(enabled, cpu) = 0;
-
- return 0;
-}
-
-static int cpufreq_sched_setup(struct cpufreq_policy *policy,
- unsigned int event)
-{
- switch (event) {
- case CPUFREQ_GOV_POLICY_INIT:
- return cpufreq_sched_policy_init(policy);
- case CPUFREQ_GOV_POLICY_EXIT:
- return cpufreq_sched_policy_exit(policy);
- case CPUFREQ_GOV_START:
- return cpufreq_sched_start(policy);
- case CPUFREQ_GOV_STOP:
- return cpufreq_sched_stop(policy);
- case CPUFREQ_GOV_LIMITS:
- cpufreq_sched_limits(policy);
- break;
- }
- return 0;
-}
-
-
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
-static
-#endif
-struct cpufreq_governor cpufreq_gov_sched = {
- .name = "sched",
- .governor = cpufreq_sched_setup,
- .owner = THIS_MODULE,
-};
-
-static int __init cpufreq_sched_init(void)
-{
- int cpu;
-
- for_each_cpu(cpu, cpu_possible_mask)
- per_cpu(enabled, cpu) = 0;
- return cpufreq_register_governor(&cpufreq_gov_sched);
-}
-
-/* Try to make this the default governor */
-fs_initcall(cpufreq_sched_init);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b3a8411bac2b..e515311aa93c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -53,7 +53,6 @@ unsigned int sysctl_sched_latency = 6000000ULL;
unsigned int normalized_sysctl_sched_latency = 6000000ULL;
unsigned int sysctl_sched_sync_hint_enable = 1;
-unsigned int sysctl_sched_initial_task_util = 0;
unsigned int sysctl_sched_cstate_aware = 1;
/*
@@ -746,9 +745,7 @@ void init_entity_runnable_average(struct sched_entity *se)
sa->load_sum = sa->load_avg * LOAD_AVG_MAX;
/*
* In previous Android versions, we used to have:
- * sa->util_avg = sched_freq() ?
- * sysctl_sched_initial_task_util :
- * scale_load_down(SCHED_LOAD_SCALE);
+ * sa->util_avg = scale_load_down(SCHED_LOAD_SCALE);
* sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
* However, that functionality has been moved to enqueue.
* It is unclear if we should restore this in enqueue.
@@ -5759,23 +5756,6 @@ unsigned long boosted_cpu_util(int cpu);
#define boosted_cpu_util(cpu) cpu_util_freq(cpu)
#endif
-#if defined(CONFIG_SMP) && defined(CONFIG_CPU_FREQ_GOV_SCHED)
-static void update_capacity_of(int cpu)
-{
- unsigned long req_cap;
-
- if (!sched_freq())
- return;
-
- /* Normalize scale-invariant capacity to cpu. */
- req_cap = boosted_cpu_util(cpu);
- req_cap = req_cap * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
- set_cfs_cpu_capacity(cpu, true, req_cap);
-}
-#else
-#define update_capacity_of(X) do {} while(0)
-#endif /* SMP and CPU_FREQ_GOV_SCHED */
-
/*
* The enqueue_task method is called before nr_running is
* increased. Here we update the fair scheduling stats and
@@ -5788,7 +5768,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct sched_entity *se = &p->se;
#ifdef CONFIG_SMP
int task_new = flags & ENQUEUE_WAKEUP_NEW;
- int task_wakeup = flags & ENQUEUE_WAKEUP;
#endif
/*
@@ -5863,19 +5842,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
rq->rd->overutilized = true;
trace_sched_overutilized(true);
}
-
- }
-
- if (!se) {
- /*
- * We want to potentially trigger a freq switch
- * request only for tasks that are waking up; this is
- * because we get here also during load balancing, but
- * in these cases it seems wise to trigger as single
- * request after load balancing is done.
- */
- if (task_new || task_wakeup)
- update_capacity_of(cpu_of(rq));
}
#endif /* CONFIG_SMP */
@@ -5953,23 +5919,6 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
*/
schedtune_dequeue_task(p, cpu_of(rq));
- if (!se) {
- /*
- * We want to potentially trigger a freq switch
- * request only for tasks that are going to sleep;
- * this is because we get here also during load
- * balancing, but in these cases it seems wise to
- * trigger as single request after load balancing is
- * done.
- */
- if (task_sleep) {
- if (rq->cfs.nr_running)
- update_capacity_of(cpu_of(rq));
- else if (sched_freq())
- set_cfs_cpu_capacity(cpu_of(rq), false, 0); /* no normalization required for 0 */
- }
- }
-
#endif /* CONFIG_SMP */
hrtick_update(rq);
@@ -6521,13 +6470,6 @@ static int group_idle_state(struct energy_env *eenv, struct sched_group *sg)
/* Take non-cpuidle idling into account (active idle/arch_cpu_idle()) */
state++;
- /*
- * Try to estimate if a deeper idle state is
- * achievable when we move the task.
- */
- for_each_cpu(i, sched_group_cpus(sg))
- grp_util += cpu_util(i);
-
src_in_grp = cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg));
dst_in_grp = cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg));
if (src_in_grp == dst_in_grp) {
@@ -6536,10 +6478,16 @@ static int group_idle_state(struct energy_env *eenv, struct sched_group *sg)
*/
goto end;
}
- /* add or remove util as appropriate to indicate what group util
- * will be (worst case - no concurrent execution) after moving the task
+
+ /*
+ * Try to estimate if a deeper idle state is
+ * achievable when we move the task.
*/
- grp_util += src_in_grp ? -eenv->util_delta : eenv->util_delta;
+ for_each_cpu(i, sched_group_cpus(sg)) {
+ grp_util += cpu_util_wake(i, eenv->task);
+ if (unlikely(i == eenv->trg_cpu))
+ grp_util += eenv->util_delta;
+ }
if (grp_util <=
((long)sg->sgc->max_capacity * (int)sg->group_weight)) {
@@ -6626,13 +6574,13 @@ static int sched_group_energy(struct energy_env *eenv)
if (sg->group_weight == 1) {
/* Remove capacity of src CPU (before task move) */
- if (eenv->util_delta == 0 &&
+ if (eenv->trg_cpu == eenv->src_cpu &&
cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg))) {
eenv->cap.before = sg->sge->cap_states[cap_idx].cap;
eenv->cap.delta -= eenv->cap.before;
}
/* Add capacity of dst CPU (after task move) */
- if (eenv->util_delta != 0 &&
+ if (eenv->trg_cpu == eenv->dst_cpu &&
cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg))) {
eenv->cap.after = sg->sge->cap_states[cap_idx].cap;
eenv->cap.delta += eenv->cap.after;
@@ -7813,6 +7761,7 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync
/* No energy saving for target_cpu, try backup */
target_cpu = tmp_backup;
eenv.dst_cpu = target_cpu;
+ eenv.trg_cpu = target_cpu;
if (tmp_backup < 0 ||
tmp_backup == prev_cpu ||
energy_diff(&eenv) >= 0) {
@@ -8865,10 +8814,6 @@ static void attach_one_task(struct rq *rq, struct task_struct *p)
{
raw_spin_lock(&rq->lock);
attach_task(rq, p);
- /*
- * We want to potentially raise target_cpu's OPP.
- */
- update_capacity_of(cpu_of(rq));
raw_spin_unlock(&rq->lock);
}
@@ -8890,11 +8835,6 @@ static void attach_tasks(struct lb_env *env)
attach_task(env->dst_rq, p);
}
- /*
- * We want to potentially raise env.dst_cpu's OPP.
- */
- update_capacity_of(env->dst_cpu);
-
raw_spin_unlock(&env->dst_rq->lock);
}
@@ -10454,11 +10394,6 @@ more_balance:
* ld_moved - cumulative load moved across iterations
*/
cur_ld_moved = detach_tasks(&env);
- /*
- * We want to potentially lower env.src_cpu's OPP.
- */
- if (cur_ld_moved)
- update_capacity_of(env.src_cpu);
/*
* We've detached some tasks from busiest_rq. Every
@@ -10708,7 +10643,6 @@ static int idle_balance(struct rq *this_rq)
struct sched_domain *sd;
int pulled_task = 0;
u64 curr_cost = 0;
- long removed_util=0;
if (cpu_isolated(this_cpu))
return 0;
@@ -10735,17 +10669,6 @@ static int idle_balance(struct rq *this_rq)
raw_spin_unlock(&this_rq->lock);
- /*
- * If removed_util_avg is !0 we most probably migrated some task away
- * from this_cpu. In this case we might be willing to trigger an OPP
- * update, but we want to do so if we don't find anybody else to pull
- * here (we will trigger an OPP update with the pulled task's enqueue
- * anyway).
- *
- * Record removed_util before calling update_blocked_averages, and use
- * it below (before returning) to see if an OPP update is required.
- */
- removed_util = atomic_long_read(&(this_rq->cfs).removed_util_avg);
update_blocked_averages(this_cpu);
rcu_read_lock();
for_each_domain(this_cpu, sd) {
@@ -10813,12 +10736,6 @@ out:
if (pulled_task) {
idle_exit_fair(this_rq);
this_rq->idle_stamp = 0;
- } else if (removed_util) {
- /*
- * No task pulled and someone has been migrated away.
- * Good case to trigger an OPP update.
- */
- update_capacity_of(this_cpu);
}
return pulled_task;
@@ -10903,10 +10820,6 @@ static int active_load_balance_cpu_stop(void *data)
p = detach_one_task(&env);
if (p) {
schedstat_inc(sd, alb_pushed);
- /*
- * We want to potentially lower env.src_cpu's OPP.
- */
- update_capacity_of(env.src_cpu);
moved = true;
} else {
schedstat_inc(sd, alb_failed);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 47e97ef57eb8..af6a7f424d94 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -10,6 +10,8 @@
#include <linux/irq_work.h>
#include <trace/events/sched.h>
+#include "tune.h"
+
int sched_rr_timeslice = RR_TIMESLICE;
static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
@@ -1394,6 +1396,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
enqueue_pushable_task(rq, p);
+
+ schedtune_enqueue_task(p, cpu_of(rq));
}
static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
@@ -1405,6 +1409,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
dec_hmp_sched_stats_rt(rq, p);
dequeue_pushable_task(rq, p);
+ schedtune_dequeue_task(p, cpu_of(rq));
}
/*
@@ -1612,41 +1617,6 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag
#endif
}
-#if defined(CONFIG_SMP) && defined(CONFIG_CPU_FREQ_GOV_SCHED)
-static void sched_rt_update_capacity_req(struct rq *rq)
-{
- u64 total, used, age_stamp, avg;
- s64 delta;
-
- if (!sched_freq())
- return;
-
- sched_avg_update(rq);
- /*
- * Since we're reading these variables without serialization make sure
- * we read them once before doing sanity checks on them.
- */
- age_stamp = READ_ONCE(rq->age_stamp);
- avg = READ_ONCE(rq->rt_avg);
- delta = rq_clock(rq) - age_stamp;
-
- if (unlikely(delta < 0))
- delta = 0;
-
- total = sched_avg_period() + delta;
-
- used = div_u64(avg, total);
- if (unlikely(used > SCHED_CAPACITY_SCALE))
- used = SCHED_CAPACITY_SCALE;
-
- set_rt_cpu_capacity(rq->cpu, 1, (unsigned long)(used));
-}
-#else
-static inline void sched_rt_update_capacity_req(struct rq *rq)
-{ }
-
-#endif
-
static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
struct rt_rq *rt_rq)
{
@@ -1715,17 +1685,8 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
if (prev->sched_class == &rt_sched_class)
update_curr_rt(rq);
- if (!rt_rq->rt_queued) {
- /*
- * The next task to be picked on this rq will have a lower
- * priority than rt tasks so we can spend some time to update
- * the capacity used by rt tasks based on the last activity.
- * This value will be the used as an estimation of the next
- * activity.
- */
- sched_rt_update_capacity_req(rq);
+ if (!rt_rq->rt_queued)
return NULL;
- }
put_prev_task(rq, prev);
@@ -2558,9 +2519,6 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
update_curr_rt(rq);
- if (rq->rt.rt_nr_running)
- sched_rt_update_capacity_req(rq);
-
watchdog(rq, p);
/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index df2fd16ca076..7426ae4dced3 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2422,64 +2422,6 @@ static inline unsigned long cpu_util_freq(int cpu)
#endif
-#ifdef CONFIG_CPU_FREQ_GOV_SCHED
-#define capacity_max SCHED_CAPACITY_SCALE
-extern unsigned int capacity_margin;
-extern struct static_key __sched_freq;
-
-static inline bool sched_freq(void)
-{
- return static_key_false(&__sched_freq);
-}
-
-/*
- * sched_capacity_reqs expects capacity requests to be normalised.
- * All capacities should sum to the range of 0-1024.
- */
-DECLARE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
-void update_cpu_capacity_request(int cpu, bool request);
-
-static inline void set_cfs_cpu_capacity(int cpu, bool request,
- unsigned long capacity)
-{
- struct sched_capacity_reqs *scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
-
- if (scr->cfs != capacity) {
- scr->cfs = capacity;
- update_cpu_capacity_request(cpu, request);
- }
-}
-
-static inline void set_rt_cpu_capacity(int cpu, bool request,
- unsigned long capacity)
-{
- if (per_cpu(cpu_sched_capacity_reqs, cpu).rt != capacity) {
- per_cpu(cpu_sched_capacity_reqs, cpu).rt = capacity;
- update_cpu_capacity_request(cpu, request);
- }
-}
-
-static inline void set_dl_cpu_capacity(int cpu, bool request,
- unsigned long capacity)
-{
- if (per_cpu(cpu_sched_capacity_reqs, cpu).dl != capacity) {
- per_cpu(cpu_sched_capacity_reqs, cpu).dl = capacity;
- update_cpu_capacity_request(cpu, request);
- }
-}
-#else
-#define sched_freq() false
-static inline void set_cfs_cpu_capacity(int cpu, bool request,
- unsigned long capacity)
-{ }
-static inline void set_rt_cpu_capacity(int cpu, bool request,
- unsigned long capacity)
-{ }
-static inline void set_dl_cpu_capacity(int cpu, bool request,
- unsigned long capacity)
-{ }
-#endif
-
#ifdef CONFIG_SCHED_HMP
/*
* HMP and EAS are orthogonal. Hopefully the compiler just elides out all code