From 1ed33cf95476666656d510fab15d87ee04e2327d Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 28 Aug 2017 09:56:27 -0700 Subject: UPSTREAM: cpufreq: schedutil: Make iowait boost more energy efficient Currently the iowait_boost feature in schedutil makes the frequency go to max on iowait wakeups. This feature was added to handle a case that Peter described where the throughput of operations involving continuous I/O requests [1] is reduced due to running at a lower frequency, however the lower throughput itself causes utilization to be low and hence causing frequency to be low hence its "stuck". Instead of going to max, its also possible to achieve the same effect by ramping up to max if there are repeated in_iowait wakeups happening. This patch is an attempt to do that. We start from a lower frequency (policy->min) and double the boost for every consecutive iowait update until we reach the maximum iowait boost frequency (iowait_boost_max). I ran a synthetic test (continuous O_DIRECT writes in a loop) on an x86 machine with intel_pstate in passive mode using schedutil. In this test the iowait_boost value ramped from 800MHz to 4GHz in 60ms. The patch achieves the desired improved throughput as the existing behavior. [1] https://patchwork.kernel.org/patch/9735885/ Change-Id: I4a018434a50f4ca29ec15b03465f6dc212e54423 Cc: Srinivas Pandruvada Cc: Len Brown Cc: Rafael J. Wysocki Cc: Viresh Kumar Cc: Ingo Molnar Cc: Peter Zijlstra Suggested-by: Peter Zijlstra Suggested-by: Viresh Kumar Signed-off-by: Joel Fernandes --- kernel/sched/cpufreq_schedutil.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index e12309c1b07b..f6b95ca15023 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -64,6 +64,7 @@ struct sugov_cpu { struct update_util_data update_util; struct sugov_policy *sg_policy; + bool iowait_boost_pending; unsigned long iowait_boost; unsigned long iowait_boost_max; u64 last_update; @@ -224,30 +225,54 @@ static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, unsigned int flags) { if (flags & SCHED_CPUFREQ_IOWAIT) { - sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; + if (sg_cpu->iowait_boost_pending) + return; + + sg_cpu->iowait_boost_pending = true; + + if (sg_cpu->iowait_boost) { + sg_cpu->iowait_boost <<= 1; + if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max) + sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; + } else { + sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min; + } } else if (sg_cpu->iowait_boost) { s64 delta_ns = time - sg_cpu->last_update; /* Clear iowait_boost if the CPU apprears to have been idle. */ - if (delta_ns > TICK_NSEC) + if (delta_ns > TICK_NSEC) { sg_cpu->iowait_boost = 0; + sg_cpu->iowait_boost_pending = false; + } } } static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, unsigned long *max) { - unsigned long boost_util = sg_cpu->iowait_boost; - unsigned long boost_max = sg_cpu->iowait_boost_max; + unsigned long boost_util, boost_max; - if (!boost_util) + if (!sg_cpu->iowait_boost) return; + if (sg_cpu->iowait_boost_pending) { + sg_cpu->iowait_boost_pending = false; + } else { + sg_cpu->iowait_boost >>= 1; + if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) { + sg_cpu->iowait_boost = 0; + return; + } + } + + boost_util = sg_cpu->iowait_boost; + boost_max = sg_cpu->iowait_boost_max; + if (*util * boost_max < *max * boost_util) { *util = boost_util; *max = boost_max; } - sg_cpu->iowait_boost >>= 1; } #ifdef CONFIG_NO_HZ_COMMON @@ -320,6 +345,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu) delta_ns = last_freq_update_time - j_sg_cpu->last_update; if (delta_ns > TICK_NSEC) { j_sg_cpu->iowait_boost = 0; + j_sg_cpu->iowait_boost_pending = false; continue; } if (j_sg_cpu->flags & SCHED_CPUFREQ_DL) -- cgit v1.2.3 From 7842de4545c71363ef599173b66ab5e1933c43e1 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Sun, 23 Jul 2017 08:54:26 -0700 Subject: UPSTREAM: cpufreq: schedutil: Use unsigned int for iowait boost Make iowait_boost and iowait_boost_max as unsigned int since its unit is kHz and this is consistent with struct cpufreq_policy. Also change the local variables in sugov_iowait_boost to match this. Change-Id: I6c67ed94c57c4bdb24bada4b97045593fcb95d2e Cc: Srinivas Pandruvada Cc: Len Brown Cc: Rafael J. Wysocki Cc: Viresh Kumar Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Joel Fernandes --- kernel/sched/cpufreq_schedutil.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index f6b95ca15023..a0bcf488fb81 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -65,8 +65,8 @@ struct sugov_cpu { struct sugov_policy *sg_policy; bool iowait_boost_pending; - unsigned long iowait_boost; - unsigned long iowait_boost_max; + unsigned int iowait_boost; + unsigned int iowait_boost_max; u64 last_update; /* The fields below are only needed when sharing a policy. */ @@ -251,7 +251,7 @@ static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, unsigned long *max) { - unsigned long boost_util, boost_max; + unsigned int boost_util, boost_max; if (!sg_cpu->iowait_boost) return; -- cgit v1.2.3 From 26b37261ea25c6928c6a69e77a8f7d39ee3267c9 Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Fri, 26 May 2017 11:11:47 -0700 Subject: sched: deadline: WALT: account cumulative runnable avg Account cumulative runnable average for WALT CPU utilization accounting. Change-Id: I56934894e626dec183740eeaf89a57d2ef638143 Signed-off-by: Joonwoo Park --- kernel/sched/deadline.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 9ec7f19b5020..2aae8b8b68e8 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -18,6 +18,8 @@ #include +#include "walt.h" + struct dl_bandwidth def_dl_bandwidth; static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) @@ -882,6 +884,7 @@ void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) WARN_ON(!dl_prio(prio)); dl_rq->dl_nr_running++; add_nr_running(rq_of_dl_rq(dl_rq), 1); + walt_inc_cumulative_runnable_avg(rq_of_dl_rq(dl_rq), dl_task_of(dl_se)); inc_dl_deadline(dl_rq, deadline); inc_dl_migration(dl_se, dl_rq); @@ -896,6 +899,7 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) WARN_ON(!dl_rq->dl_nr_running); dl_rq->dl_nr_running--; sub_nr_running(rq_of_dl_rq(dl_rq), 1); + walt_dec_cumulative_runnable_avg(rq_of_dl_rq(dl_rq), dl_task_of(dl_se)); dec_dl_deadline(dl_rq, dl_se->deadline); dec_dl_migration(dl_se, dl_rq); -- cgit v1.2.3 From 48f67ea85de468a9b3e47e723e7681cf7771dea6 Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Fri, 26 May 2017 11:19:36 -0700 Subject: sched: WALT: fix broken cumulative runnable average accounting When running tasks's ravg.demand is changed update_history() adjusts rq->cumulative_runnable_avg to reflect change of CPU load. Currently this fixup is broken by accumulating task's new demand without subtracting the task's old demand. Fix the fixup logic to subtract the task's old demand. Change-Id: I61beb32a4850879ccb39b733f5564251e465bfeb Signed-off-by: Joonwoo Park --- kernel/sched/walt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index 92c3aae8e056..166641ed1f39 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -111,8 +111,10 @@ walt_dec_cumulative_runnable_avg(struct rq *rq, static void fixup_cumulative_runnable_avg(struct rq *rq, - struct task_struct *p, s64 task_load_delta) + struct task_struct *p, u64 new_task_load) { + s64 task_load_delta = (s64)new_task_load - task_load(p); + rq->cumulative_runnable_avg += task_load_delta; if ((s64)rq->cumulative_runnable_avg < 0) panic("cra less than zero: tld: %lld, task_load(p) = %u\n", -- cgit v1.2.3 From ee4cebd75ed7b77132c39c0093923f9ff1bcafaa Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Thu, 8 Dec 2016 16:12:12 -0800 Subject: sched: EAS/WALT: use cr_avg instead of prev_runnable_sum WALT accounts two major statistics; CPU load and cumulative tasks demand. The CPU load which is account of accumulated each CPU's absolute execution time is for CPU frequency guidance. Whereas cumulative tasks demand which is each CPU's instantaneous load to reflect CPU's load at given time is for task placement decision. Use cumulative tasks demand for cpu_util() for task placement and introduce cpu_util_freq() for frequency guidance. Change-Id: Id928f01dbc8cb2a617cdadc584c1f658022565c5 Signed-off-by: Joonwoo Park --- kernel/sched/core.c | 2 +- kernel/sched/fair.c | 4 ++-- kernel/sched/sched.h | 16 +++++++++++++++- 3 files changed, 18 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9307827cc7b1..4f97de8e0b18 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2992,7 +2992,7 @@ static void sched_freq_tick_pelt(int cpu) #ifdef CONFIG_SCHED_WALT static void sched_freq_tick_walt(int cpu) { - unsigned long cpu_utilization = cpu_util(cpu); + unsigned long cpu_utilization = cpu_util_freq(cpu); unsigned long capacity_curr = capacity_curr_of(cpu); if (walt_disabled || !sysctl_sched_use_walt_cpu_util) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c193e9b1c38f..3641dad3d4cc 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4659,7 +4659,7 @@ static inline void hrtick_update(struct rq *rq) static bool cpu_overutilized(int cpu); unsigned long boosted_cpu_util(int cpu); #else -#define boosted_cpu_util(cpu) cpu_util(cpu) +#define boosted_cpu_util(cpu) cpu_util_freq(cpu) #endif #ifdef CONFIG_SMP @@ -5937,7 +5937,7 @@ schedtune_task_margin(struct task_struct *task) unsigned long boosted_cpu_util(int cpu) { - unsigned long util = cpu_util(cpu); + unsigned long util = cpu_util_freq(cpu); long margin = schedtune_cpu_margin(util, cpu); trace_sched_boost_cpu(cpu, util, margin); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 029cf2bbeda2..73077f535e95 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1592,7 +1592,7 @@ static inline unsigned long __cpu_util(int cpu, int delta) #ifdef CONFIG_SCHED_WALT if (!walt_disabled && sysctl_sched_use_walt_cpu_util) { - util = cpu_rq(cpu)->prev_runnable_sum << SCHED_LOAD_SHIFT; + util = cpu_rq(cpu)->cumulative_runnable_avg << SCHED_LOAD_SHIFT; do_div(util, walt_ravg_window); } #endif @@ -1608,6 +1608,20 @@ static inline unsigned long cpu_util(int cpu) return __cpu_util(cpu, 0); } +static inline unsigned long cpu_util_freq(int cpu) +{ + unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg; + unsigned long capacity = capacity_orig_of(cpu); + +#ifdef CONFIG_SCHED_WALT + if (!walt_disabled && sysctl_sched_use_walt_cpu_util) { + util = cpu_rq(cpu)->prev_runnable_sum << SCHED_LOAD_SHIFT; + do_div(util, walt_ravg_window); + } +#endif + return (util >= capacity) ? capacity : util; +} + #endif #ifdef CONFIG_CPU_FREQ_GOV_SCHED -- cgit v1.2.3 From 2d7da09705d6560c61676376be3ae05a5019600a Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Mon, 16 Jan 2017 18:09:20 -0800 Subject: sched: EAS: schedfreq: fix CPU util over estimation WALT CPU utilization reports CPU load of all the scheduler classes. Therefore adding RT class's load additionally will cause frequency overshooting. Fix such issue by not accounting RT class load when requesting capacity. Change-Id: I29600d7af7ca8c00e0d2ff1e13872024ccaa72bf Signed-off-by: Joonwoo Park --- kernel/sched/cpufreq_sched.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c index f10d9f7d6d07..6ffb23adbcef 100644 --- a/kernel/sched/cpufreq_sched.c +++ b/kernel/sched/cpufreq_sched.c @@ -235,6 +235,18 @@ out: cpufreq_cpu_put(policy); } +#ifdef CONFIG_SCHED_WALT +static inline unsigned long +requested_capacity(struct sched_capacity_reqs *scr) +{ + if (!walt_disabled && sysctl_sched_use_walt_cpu_util) + return scr->cfs; + return scr->cfs + scr->rt; +} +#else +#define requested_capacity(scr) (scr->cfs + scr->rt) +#endif + void update_cpu_capacity_request(int cpu, bool request) { unsigned long new_capacity; @@ -245,7 +257,7 @@ void update_cpu_capacity_request(int cpu, bool request) scr = &per_cpu(cpu_sched_capacity_reqs, cpu); - new_capacity = scr->cfs + scr->rt; + new_capacity = requested_capacity(scr); new_capacity = new_capacity * capacity_margin / SCHED_CAPACITY_SCALE; new_capacity += scr->dl; -- cgit v1.2.3 From c8b8c92bbc8954ae18cba7eb0c1f3a06a244129c Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Fri, 20 Jan 2017 11:10:15 -0800 Subject: sched: WALT: fix potential overflow Task demand and CPU util are in u64. Change-Id: If7ec1623e723026d3346201122aab0303a6d2ba2 Signed-off-by: Joonwoo Park --- kernel/sched/sched.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 73077f535e95..d4613c5be81d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1591,10 +1591,9 @@ static inline unsigned long __cpu_util(int cpu, int delta) unsigned long capacity = capacity_orig_of(cpu); #ifdef CONFIG_SCHED_WALT - if (!walt_disabled && sysctl_sched_use_walt_cpu_util) { - util = cpu_rq(cpu)->cumulative_runnable_avg << SCHED_LOAD_SHIFT; - do_div(util, walt_ravg_window); - } + if (!walt_disabled && sysctl_sched_use_walt_cpu_util) + util = div64_u64(cpu_rq(cpu)->cumulative_runnable_avg, + walt_ravg_window >> SCHED_LOAD_SHIFT); #endif delta += util; if (delta < 0) @@ -1614,10 +1613,9 @@ static inline unsigned long cpu_util_freq(int cpu) unsigned long capacity = capacity_orig_of(cpu); #ifdef CONFIG_SCHED_WALT - if (!walt_disabled && sysctl_sched_use_walt_cpu_util) { - util = cpu_rq(cpu)->prev_runnable_sum << SCHED_LOAD_SHIFT; - do_div(util, walt_ravg_window); - } + if (!walt_disabled && sysctl_sched_use_walt_cpu_util) + util = div64_u64(cpu_rq(cpu)->prev_runnable_sum, + walt_ravg_window >> SCHED_LOAD_SHIFT); #endif return (util >= capacity) ? capacity : util; } -- cgit v1.2.3 From f94958ffa75d4f18d6d6838629d71edee41103c5 Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Wed, 25 Jan 2017 17:07:19 -0800 Subject: cpufreq: sched: WALT: don't apply capacity margin twice With WALT all the scheduler classes' load are accounted in scr->cfs and update_cpu_capacity_request() adds capacity margin. At present, at tick path, scheduler also adds capacity margin. Therefore the margin applied twice. Fix such error by using margin applied cpu utilization only for checking whether frequency increase is needed. Change-Id: Id7d8cc73b2e4eec70b274ca66e09bb0b16bf6f09 Signed-off-by: Joonwoo Park (trivial rebase conflict) Signed-off-by: Chris Redpath --- kernel/sched/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4f97de8e0b18..0b14b4634b8c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2999,13 +2999,13 @@ static void sched_freq_tick_walt(int cpu) return sched_freq_tick_pelt(cpu); /* - * Add a margin to the WALT utilization. + * Add a margin to the WALT utilization to check if we will need to + * increase frequency. * NOTE: WALT tracks a single CPU signal for all the scheduling * classes, thus this margin is going to be added to the DL class as * well, which is something we do not do in sched_freq_tick_pelt case. */ - cpu_utilization = add_capacity_margin(cpu_utilization); - if (cpu_utilization <= capacity_curr) + if (add_capacity_margin(cpu_utilization) <= capacity_curr) return; /* -- cgit v1.2.3 From 94e5c965075b55a5dfd1c4cce580e2dfb0c7ffc3 Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Wed, 25 Jan 2017 17:45:56 -0800 Subject: sched: EAS/WALT: take into account of waking task's load WALT's function cpu_util(cpu) reports CPU's load without taking into account of waking task's load. Thus currently cpu_overutilized() underestimates load on the previous CPU of waking task. Take into account of task's load to determine whether previous CPU is overutilzed to bail out early without running energy_diff() which is expensive. Change-Id: I30f146984a880ad2cc1b8a4ce35bd239a8c9a607 Signed-off-by: Joonwoo Park (minor rebase conflicts) Signed-off-by: Chris Redpath --- kernel/sched/fair.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3641dad3d4cc..94bb5786d8a7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4656,6 +4656,7 @@ static inline void hrtick_update(struct rq *rq) #endif #ifdef CONFIG_SMP +static bool __cpu_overutilized(int cpu, int delta); static bool cpu_overutilized(int cpu); unsigned long boosted_cpu_util(int cpu); #else @@ -5856,9 +5857,14 @@ static inline bool task_fits_max(struct task_struct *p, int cpu) return __task_fits(p, cpu, 0); } +static bool __cpu_overutilized(int cpu, int delta) +{ + return (capacity_of(cpu) * 1024) < ((cpu_util(cpu) + delta) * capacity_margin); +} + static bool cpu_overutilized(int cpu) { - return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin); + return __cpu_overutilized(cpu, 0); } #ifdef CONFIG_SCHED_TUNE @@ -6577,6 +6583,7 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync } if (target_cpu != prev_cpu) { + int delta = 0; struct energy_env eenv = { .util_delta = task_util(p), .src_cpu = prev_cpu, @@ -6584,8 +6591,13 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync .task = p, }; + +#ifdef CONFIG_SCHED_WALT + if (!walt_disabled && sysctl_sched_use_walt_cpu_util) + delta = task_util(p); +#endif /* Not enough spare capacity on previous cpu */ - if (cpu_overutilized(prev_cpu)) { + if (__cpu_overutilized(prev_cpu, delta)) { schedstat_inc(p, se.statistics.nr_wakeups_secb_insuff_cap); schedstat_inc(this_rq(), eas_stats.secb_insuff_cap); goto unlock; -- cgit v1.2.3 From 11b618a0b2fc8598474a518add7037d4005d83e8 Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Mon, 24 Jul 2017 13:23:05 +0100 Subject: sched: EAS: fix incorrect energy delta calculation due to rounding error In order to calculate energy difference we currently iterates CPUs under the same sched doamin to accumulate total energy cost and compare before and after : for_each_domain(cpu) total_energy_before += (cpu_util * power) >> SCHED_CAPACITY_SHIFT; for_each_domain(cpu) total_energy_after += (cpu_util * power) >> SCHED_CAPACITY_SHIFT; Doing such can incorrectly calculate and report abs(delta) > 0 when there is actually no energy delta between before and after because the same total accumulated cpu_util of all the CPUs can be distributed differently before and after and it causes different amount of rounding error. Fix such incorrectness by shifting just once with accumulated total_energy. Change-Id: I82f1e2e358367058960938b4ef81714f57e921cf Signed-off-by: Joonwoo Park (moved part to another commit) Signed-off-by: Chris Redpath --- kernel/sched/fair.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 94bb5786d8a7..61c8952c7aaf 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5473,10 +5473,8 @@ end: */ static int sched_group_energy(struct energy_env *eenv) { - struct sched_domain *sd; - int cpu, total_energy = 0; struct cpumask visit_cpus; - struct sched_group *sg; + u64 total_energy = 0; WARN_ON(!eenv->sg_top->sge); @@ -5484,8 +5482,8 @@ static int sched_group_energy(struct energy_env *eenv) while (!cpumask_empty(&visit_cpus)) { struct sched_group *sg_shared_cap = NULL; - - cpu = cpumask_first(&visit_cpus); + int cpu = cpumask_first(&visit_cpus); + struct sched_domain *sd; /* * Is the group utilization affected by cpus outside this @@ -5497,7 +5495,7 @@ static int sched_group_energy(struct energy_env *eenv) sg_shared_cap = sd->parent->groups; for_each_domain(cpu, sd) { - sg = sd->groups; + struct sched_group *sg = sd->groups; /* Has this sched_domain already been visited? */ if (sd->child && group_first_cpu(sg) != cpu) @@ -5533,11 +5531,9 @@ static int sched_group_energy(struct energy_env *eenv) idle_idx = group_idle_state(eenv, sg); group_util = group_norm_util(eenv, sg); - sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power) - >> SCHED_CAPACITY_SHIFT; + sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power); sg_idle_energy = ((SCHED_LOAD_SCALE-group_util) - * sg->sge->idle_states[idle_idx].power) - >> SCHED_CAPACITY_SHIFT; + * sg->sge->idle_states[idle_idx].power); total_energy += sg_busy_energy + sg_idle_energy; @@ -5562,7 +5558,7 @@ next_cpu: continue; } - eenv->energy = total_energy; + eenv->energy = total_energy >> SCHED_CAPACITY_SHIFT; return 0; } -- cgit v1.2.3 From 3989a247e2744f437233a36a0183edf3b3dfc8f1 Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Tue, 7 Mar 2017 18:08:24 -0800 Subject: sched: EAS: kill incorrect nohz idle cpu kick EAS won't allow NOHZ idle balancer until CPU's over utilized. However nohz_kick_needed() can return true. This causes idle CPU wake up for nothing. Change-Id: I6e548442e29e4f85cda695e4c7101dd591b12fe6 Signed-off-by: Joonwoo Park --- kernel/sched/fair.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 61c8952c7aaf..0f43ece69e8c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9768,12 +9768,12 @@ static inline bool nohz_kick_needed(struct rq *rq) return true; /* Do idle load balance if there have misfit task */ - if (energy_aware() && rq->misfit_task) - return true; + if (energy_aware()) + return rq->misfit_task; rcu_read_lock(); sd = rcu_dereference(per_cpu(sd_busy, cpu)); - if (sd && !energy_aware()) { + if (sd) { sgc = sd->groups->sgc; nr_busy = atomic_read(&sgc->nr_busy_cpus); -- cgit v1.2.3 From 0caf1df0c520325c9352149795266709e989b222 Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Tue, 16 May 2017 11:13:00 -0700 Subject: sched: WALT: fix window mis-alignment The initial window start needs to be close to ktime ns = 0 to be aligned with scheduler tick. Change-Id: Ia91f74efce2f910106622a054a6fcd507e763ca5 Signed-off-by: Joonwoo Park --- kernel/sched/walt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index 166641ed1f39..28e999554463 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -804,11 +804,11 @@ void walt_set_window_start(struct rq *rq) int cpu = cpu_of(rq); struct rq *sync_rq = cpu_rq(sync_cpu); - if (rq->window_start) + if (likely(rq->window_start)) return; if (cpu == sync_cpu) { - rq->window_start = walt_ktime_clock(); + rq->window_start = 1; } else { raw_spin_unlock(&rq->lock); double_rq_lock(rq, sync_rq); -- cgit v1.2.3 From 2f3e97a814c8f906d3334c85271aacb7a8783490 Mon Sep 17 00:00:00 2001 From: Florian Meier Date: Thu, 14 Jul 2016 12:07:26 -0700 Subject: gcov: add support for gcc version >= 6 commit d02038f972538b93011d78c068f44514fbde0a8c upstream. Link: http://lkml.kernel.org/r/20160701130914.GA23225@styxhp Signed-off-by: Florian Meier Reviewed-by: Peter Oberparleiter Tested-by: Peter Oberparleiter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/gcov/gcc_4_7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c index e25e92fb44fa..6a5c239c7669 100644 --- a/kernel/gcov/gcc_4_7.c +++ b/kernel/gcov/gcc_4_7.c @@ -18,7 +18,7 @@ #include #include "gcov.h" -#if __GNUC__ == 5 && __GNUC_MINOR__ >= 1 +#if (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1) #define GCOV_COUNTERS 10 #elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9 #define GCOV_COUNTERS 9 -- cgit v1.2.3 From d255fffdb532caf103369a5894be942e528c3151 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Fri, 12 May 2017 15:46:35 -0700 Subject: gcov: support GCC 7.1 commit 05384213436ab690c46d9dfec706b80ef8d671ab upstream. Starting from GCC 7.1, __gcov_exit is a new symbol expected to be implemented in a profiling runtime. [akpm@linux-foundation.org: coding-style fixes] [mliska@suse.cz: v2] Link: http://lkml.kernel.org/r/e63a3c59-0149-c97e-4084-20ca8f146b26@suse.cz Link: http://lkml.kernel.org/r/8c4084fa-3885-29fe-5fc4-0d4ca199c785@suse.cz Signed-off-by: Martin Liska Acked-by: Peter Oberparleiter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/gcov/base.c | 6 ++++++ kernel/gcov/gcc_4_7.c | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c index 7080ae1eb6c1..f850e906564b 100644 --- a/kernel/gcov/base.c +++ b/kernel/gcov/base.c @@ -98,6 +98,12 @@ void __gcov_merge_icall_topn(gcov_type *counters, unsigned int n_counters) } EXPORT_SYMBOL(__gcov_merge_icall_topn); +void __gcov_exit(void) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_exit); + /** * gcov_enable_events - enable event reporting through gcov_event() * diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c index 6a5c239c7669..46a18e72bce6 100644 --- a/kernel/gcov/gcc_4_7.c +++ b/kernel/gcov/gcc_4_7.c @@ -18,7 +18,9 @@ #include #include "gcov.h" -#if (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1) +#if (__GNUC__ >= 7) +#define GCOV_COUNTERS 9 +#elif (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1) #define GCOV_COUNTERS 10 #elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9 #define GCOV_COUNTERS 9 -- cgit v1.2.3 From 3482bbea6b3e9ed685ce42a77927dfeaec0be122 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 11 Apr 2017 00:20:41 +0200 Subject: BACKPORT: cpufreq: schedutil: Use policy-dependent transition delays Make the schedutil governor take the initial (default) value of the rate_limit_us sysfs attribute from the (new) transition_delay_us policy parameter (to be set by the scaling driver). That will allow scaling drivers to make schedutil use smaller default values of rate_limit_us and reduce the default average time interval between consecutive frequency changes. Make intel_pstate set transition_delay_us to 500. BACKPORT: Modified to support the separate up_rate_limit_us and down_rate_limit_us (upstream just has a single rate_limit_us). Also dropped the changes for intel_pstate as there's a merge conflict. Change-Id: I62a8543879a4d8582cdcb31ebd55607705d1c8b1 Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar (cherry picked from commit 1b72e7fd304639f1cd49d1e11955c4974936d88c) Signed-off-by: Brendan Jackman --- kernel/sched/cpufreq_schedutil.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index a0bcf488fb81..b90f7434e13b 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -615,7 +615,6 @@ static int sugov_init(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy; struct sugov_tunables *tunables; - unsigned int lat; int ret = 0; /* State should be equivalent to EXIT */ @@ -654,12 +653,19 @@ static int sugov_init(struct cpufreq_policy *policy) goto stop_kthread; } - tunables->up_rate_limit_us = LATENCY_MULTIPLIER; - tunables->down_rate_limit_us = LATENCY_MULTIPLIER; - lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; - if (lat) { - tunables->up_rate_limit_us *= lat; - tunables->down_rate_limit_us *= lat; + if (policy->up_transition_delay_us && policy->down_transition_delay_us) { + tunables->up_rate_limit_us = policy->up_transition_delay_us; + tunables->down_rate_limit_us = policy->down_transition_delay_us; + } else { + unsigned int lat; + + tunables->up_rate_limit_us = LATENCY_MULTIPLIER; + tunables->down_rate_limit_us = LATENCY_MULTIPLIER; + lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; + if (lat) { + tunables->up_rate_limit_us *= lat; + tunables->down_rate_limit_us *= lat; + } } policy->governor_data = sg_policy; -- cgit v1.2.3 From ed48d9230e303aaff037d60fd866088c114184e5 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 24 Aug 2017 12:04:29 -0400 Subject: cpuset: Fix incorrect memory_pressure control file mapping commit 1c08c22c874ac88799cab1f78c40f46110274915 upstream. The memory_pressure control file was incorrectly set up without a private value (0, by default). As a result, this control file was treated like memory_migrate on read. By adding back the FILE_MEMORY_PRESSURE private value, the correct memory pressure value will be returned. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo Fixes: 7dbdb199d3bf ("cgroup: replace cftype->mode with CFTYPE_WORLD_WRITABLE") Signed-off-by: Greg Kroah-Hartman --- kernel/cpuset.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 8ccd66a97c8b..2924b6faa469 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1910,6 +1910,7 @@ static struct cftype files[] = { { .name = "memory_pressure", .read_u64 = cpuset_read_u64, + .private = FILE_MEMORY_PRESSURE, }, { -- cgit v1.2.3