summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavide Garberi <dade.garberi@gmail.com>2023-08-06 15:31:30 +0200
committerDavide Garberi <dade.garberi@gmail.com>2023-08-06 15:31:30 +0200
commitcc57cb4ee3b7918b74d30604735d353b9a5fa23b (patch)
tree0be483b86472eaf1c74f747ecbaf6300f3998a1a
parent44be99a74546fb018cbf2049602a5fd2889a0089 (diff)
parent7d11b1a7a11c598a07687f853ded9eca97d89043 (diff)
Merge lineage-20 of git@github.com:LineageOS/android_kernel_qcom_msm8998.git into lineage-20
7d11b1a7a11c Revert "sched: cpufreq: Use sched_clock instead of rq_clock when updating schedutil" daaa5da96a74 sched: Take irq_sparse lock during the isolation 217ab2d0ef91 rcu: Speed up calling of RCU tasks callbacks 997b726bc092 kernel: power: Workaround for sensor ipc message causing high power consume b933e4d37bc0 sched/fair: Fix low cpu usage with high throttling by removing expiration of cpu-local slices 82d3f23d6dc5 sched/fair: Fix bandwidth timer clock drift condition 629bfed360f9 kernel: power: qos: remove check for core isolation while cluster LPMs 891a63210e1d sched/fair: Fix issue where frequency update not skipped b775cb29f663 ANDROID: Move schedtune en/dequeue before schedutil update triggers ebdb82f7b34a sched/fair: Skip frequency updates if CPU about to idle ff383d94478a FROMLIST: sched: Make iowait_boost optional in schedutil 9539942cb065 FROMLIST: cpufreq: Make iowait boost a policy option b65c91c9aa14 ARM: dts: msm: add HW CPU's busy-cost-data for additional freqs 72f13941085b ARM: dts: msm: fix CPU's idle-cost-data ab88411382f7 ARM: dts: msm: fix EM to be monotonically increasing 83dcbae14782 ARM: dts: msm: Fix EAS idle-cost-data property length 33d3b17bfdfb ARM: dts: msm: Add msm8998 energy model c0fa7577022c sched/walt: Re-add code to allow WALT to function d5cd35f38616 FROMGIT: binder: use EINTR for interrupted wait for work db74739c86de sched: Don't fail isolation request for an already isolated CPU aee7a16e347b sched: WALT: increase WALT minimum window size to 20ms 4dbe44554792 sched: cpufreq: Use per_cpu_ptr instead of this_cpu_ptr when reporting load ef3fb04c7df4 sched: cpufreq: Use sched_clock instead of rq_clock when updating schedutil c7128748614a sched/cpupri: Exclude isolated CPUs from the lowest_mask 6adb092856e8 sched: cpufreq: Limit governor updates to WALT changes alone 0fa652ee00f5 sched: walt: Correct WALT window size initialization 41cbb7bc59fb sched: walt: fix window misalignment when HZ=300 43cbf9d6153d sched/tune: Increase the cgroup limit to 6 c71b8fffe6b3 drivers: cpuidle: lpm-levels: Fix KW issues with idle state idx < 0 938e42ca699f drivers: cpuidle: lpm-levels: Correctly check for list empty 8d8a48aecde5 sched/fair: Fix load_balance() affinity redo path eccc8acbe705 sched/fair: Avoid unnecessary active load balance 0ffdb886996b BACKPORT: sched/core: Fix rules for running on online && !active CPUs c9999f04236e sched/core: Allow kthreads to fall back to online && !active cpus b9b6bc6ea3c0 sched: Allow migrating kthreads into online but inactive CPUs a9314f9d8ad4 sched/fair: Allow load bigger task load balance when nr_running is 2 c0b317c27d44 pinctrl: qcom: Clear status bit on irq_unmask 45df1516d04a UPSTREAM: mm: fix misplaced unlock_page in do_wp_page() 899def5edcd4 UPSTREAM: mm/ksm: Remove reuse_ksm_page() 46c6fbdd185a BACKPORT: mm: do_wp_page() simplification 90dccbae4c04 UPSTREAM: mm: reuse only-pte-mapped KSM page in do_wp_page() ebf270d24640 sched/fair: vruntime should normalize when switching from fair cbe0b37059c9 mm: introduce arg_lock to protect arg_start|end and env_start|end in mm_struct 12d40f1995b4 msm: mdss: Fix indentation 620df03a7229 msm: mdss: Treat polling_en as the bool that it is 12af218146a6 msm: mdss: add idle state node 13e661759656 cpuset: Restore tasks affinity while moving across cpusets 602bf4096dab genirq: Honour IRQ's affinity hint during migration 9209b5556f6a power: qos: Use effective affinity mask f31078b5825f genirq: Introduce effective affinity mask 58c453484f7e sched/cputime: Mitigate performance regression in times()/clock_gettime() 400383059868 kernel: time: Add delay after cpu_relax() in tight loops 1daa7ea39076 pinctrl: qcom: Update irq handle for GPIO pins 07f7c9961c7c power: smb-lib: Fix mutex acquisition deadlock on PD hard reset 094b738f46c8 power: qpnp-smb2: Implement battery charging_enabled node d6038d6da57f ASoC: msm-pcm-q6-v2: Add dsp buf check 0d7a6c301af8 qcacld-3.0: Fix OOB in wma_scan_roam.c Change-Id: Ia2e189e37daad6e99bdb359d1204d9133a7916f4
-rw-r--r--Documentation/scheduler/sched-bwc.txt45
-rw-r--r--arch/arm/boot/dts/qcom/msm8998.dtsi144
-rw-r--r--drivers/android/binder.c4
-rw-r--r--drivers/cpufreq/intel_pstate.c1
-rw-r--r--drivers/cpuidle/lpm-levels.c17
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm.c9
-rw-r--r--drivers/power/supply/qcom/qpnp-smb2.c8
-rw-r--r--drivers/power/supply/qcom/smb-lib.c24
-rw-r--r--drivers/soc/qcom/smp2p_sleepstate.c5
-rw-r--r--drivers/video/fbdev/msm/mdss_mdp_intf_video.c2
-rw-r--r--drivers/video/fbdev/msm/msm_mdss_io_8974.c20
-rw-r--r--fs/proc/base.c8
-rw-r--r--include/linux/cpufreq.h3
-rw-r--r--include/linux/init_task.h1
-rw-r--r--include/linux/irq.h29
-rw-r--r--include/linux/mm_types.h2
-rw-r--r--include/linux/sched.h11
-rw-r--r--include/linux/sched/sysctl.h6
-rw-r--r--include/linux/time.h1
-rw-r--r--include/trace/events/sched.h7
-rw-r--r--kernel/cpuset.c18
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/irq/Kconfig4
-rw-r--r--kernel/irq/cpuhotplug.c8
-rw-r--r--kernel/irq/irqdesc.c14
-rw-r--r--kernel/irq/proc.c90
-rw-r--r--kernel/power/qos.c30
-rw-r--r--kernel/power/suspend.c6
-rw-r--r--kernel/rcu/update.c17
-rw-r--r--kernel/sched/Makefile1
-rw-r--r--kernel/sched/core.c100
-rw-r--r--kernel/sched/cpufreq_schedutil.c32
-rw-r--r--kernel/sched/cpupri.c2
-rw-r--r--kernel/sched/cputime.c6
-rw-r--r--kernel/sched/fair.c214
-rw-r--r--kernel/sched/hmp.c32
-rw-r--r--kernel/sched/rt.c11
-rw-r--r--kernel/sched/sched.h17
-rw-r--r--kernel/sched/stop_task.c3
-rw-r--r--kernel/sched/tune.c4
-rw-r--r--kernel/sched/walt.c5
-rw-r--r--kernel/sched/walt.h2
-rw-r--r--kernel/sys.c10
-rw-r--r--kernel/sysctl.c30
-rw-r--r--kernel/time/alarmtimer.c2
-rw-r--r--kernel/time/hrtimer.c3
-rw-r--r--kernel/time/timer.c2
-rw-r--r--mm/init-mm.c1
-rw-r--r--mm/ksm.c5
-rw-r--r--mm/memory.c42
-rw-r--r--sound/soc/msm/qdsp6v2/msm-pcm-q6-v2.c9
51 files changed, 842 insertions, 226 deletions
diff --git a/Documentation/scheduler/sched-bwc.txt b/Documentation/scheduler/sched-bwc.txt
index f6b1873f68ab..de583fbbfe42 100644
--- a/Documentation/scheduler/sched-bwc.txt
+++ b/Documentation/scheduler/sched-bwc.txt
@@ -90,6 +90,51 @@ There are two ways in which a group may become throttled:
In case b) above, even though the child may have runtime remaining it will not
be allowed to until the parent's runtime is refreshed.
+CFS Bandwidth Quota Caveats
+---------------------------
+Once a slice is assigned to a cpu it does not expire. However all but 1ms of
+the slice may be returned to the global pool if all threads on that cpu become
+unrunnable. This is configured at compile time by the min_cfs_rq_runtime
+variable. This is a performance tweak that helps prevent added contention on
+the global lock.
+
+The fact that cpu-local slices do not expire results in some interesting corner
+cases that should be understood.
+
+For cgroup cpu constrained applications that are cpu limited this is a
+relatively moot point because they will naturally consume the entirety of their
+quota as well as the entirety of each cpu-local slice in each period. As a
+result it is expected that nr_periods roughly equal nr_throttled, and that
+cpuacct.usage will increase roughly equal to cfs_quota_us in each period.
+
+For highly-threaded, non-cpu bound applications this non-expiration nuance
+allows applications to briefly burst past their quota limits by the amount of
+unused slice on each cpu that the task group is running on (typically at most
+1ms per cpu or as defined by min_cfs_rq_runtime). This slight burst only
+applies if quota had been assigned to a cpu and then not fully used or returned
+in previous periods. This burst amount will not be transferred between cores.
+As a result, this mechanism still strictly limits the task group to quota
+average usage, albeit over a longer time window than a single period. This
+also limits the burst ability to no more than 1ms per cpu. This provides
+better more predictable user experience for highly threaded applications with
+small quota limits on high core count machines. It also eliminates the
+propensity to throttle these applications while simultanously using less than
+quota amounts of cpu. Another way to say this, is that by allowing the unused
+portion of a slice to remain valid across periods we have decreased the
+possibility of wastefully expiring quota on cpu-local silos that don't need a
+full slice's amount of cpu time.
+
+The interaction between cpu-bound and non-cpu-bound-interactive applications
+should also be considered, especially when single core usage hits 100%. If you
+gave each of these applications half of a cpu-core and they both got scheduled
+on the same CPU it is theoretically possible that the non-cpu bound application
+will use up to 1ms additional quota in some periods, thereby preventing the
+cpu-bound application from fully using its quota by that same amount. In these
+instances it will be up to the CFS algorithm (see sched-design-CFS.rst) to
+decide which application is chosen to run, as they will both be runnable and
+have remaining quota. This runtime discrepancy will be made up in the following
+periods when the interactive application idles.
+
Examples
--------
1. Limit a group to 1 CPU worth of runtime.
diff --git a/arch/arm/boot/dts/qcom/msm8998.dtsi b/arch/arm/boot/dts/qcom/msm8998.dtsi
index ae664e48afff..b9a38ddc5ba8 100644
--- a/arch/arm/boot/dts/qcom/msm8998.dtsi
+++ b/arch/arm/boot/dts/qcom/msm8998.dtsi
@@ -49,6 +49,7 @@
qcom,lmh-dcvs = <&lmh_dcvs0>;
enable-method = "psci";
efficiency = <1024>;
+ sched-energy-costs = <&CPU_COST_0 &CLUSTER_COST_0>;
next-level-cache = <&L2_0>;
qcom,ea = <&ea0>;
L2_0: l2-cache {
@@ -77,6 +78,7 @@
qcom,lmh-dcvs = <&lmh_dcvs0>;
enable-method = "psci";
efficiency = <1024>;
+ sched-energy-costs = <&CPU_COST_0 &CLUSTER_COST_0>;
next-level-cache = <&L2_0>;
qcom,ea = <&ea1>;
L1_I_1: l1-icache {
@@ -100,6 +102,7 @@
qcom,lmh-dcvs = <&lmh_dcvs0>;
enable-method = "psci";
efficiency = <1024>;
+ sched-energy-costs = <&CPU_COST_0 &CLUSTER_COST_0>;
next-level-cache = <&L2_0>;
qcom,ea = <&ea2>;
L1_I_2: l1-icache {
@@ -123,6 +126,7 @@
qcom,lmh-dcvs = <&lmh_dcvs0>;
enable-method = "psci";
efficiency = <1024>;
+ sched-energy-costs = <&CPU_COST_0 &CLUSTER_COST_0>;
next-level-cache = <&L2_0>;
qcom,ea = <&ea3>;
L1_I_3: l1-icache {
@@ -146,6 +150,7 @@
qcom,lmh-dcvs = <&lmh_dcvs1>;
enable-method = "psci";
efficiency = <1536>;
+ sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>;
next-level-cache = <&L2_1>;
qcom,ea = <&ea4>;
L2_1: l2-cache {
@@ -173,6 +178,7 @@
qcom,lmh-dcvs = <&lmh_dcvs1>;
enable-method = "psci";
efficiency = <1536>;
+ sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>;
next-level-cache = <&L2_1>;
qcom,ea = <&ea5>;
L1_I_101: l1-icache {
@@ -196,6 +202,7 @@
qcom,lmh-dcvs = <&lmh_dcvs1>;
enable-method = "psci";
efficiency = <1536>;
+ sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>;
next-level-cache = <&L2_1>;
qcom,ea = <&ea6>;
L1_I_102: l1-icache {
@@ -219,6 +226,7 @@
qcom,lmh-dcvs = <&lmh_dcvs1>;
enable-method = "psci";
efficiency = <1536>;
+ sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>;
next-level-cache = <&L2_1>;
qcom,ea = <&ea7>;
L1_I_103: l1-icache {
@@ -271,6 +279,142 @@
};
};
};
+ energy-costs {
+ CPU_COST_0: core-cost0 {
+ busy-cost-data = <
+ 65 11
+ 80 20
+ 96 25
+ 113 30
+ 130 33
+ 147 35
+ 164 36
+ 181 42
+ 194 47
+ 211 54
+ 228 62
+ 243 67
+ 258 73
+ 275 79
+ 292 88
+ 308 95
+ 326 104
+ 342 111
+ 368 134
+ 384 155
+ 401 178
+ 419 201
+ >;
+ idle-cost-data = <
+ 4 4 0 0
+ >;
+ };
+ CPU_COST_1: core-cost1 {
+ busy-cost-data = <
+ 129 56
+ 148 76
+ 182 91
+ 216 105
+ 247 118
+ 278 135
+ 312 150
+ 344 162
+ 391 181
+ 419 196
+ 453 214
+ 487 229
+ 509 248
+ 546 280
+ 581 316
+ 615 354
+ 650 392
+ 676 439
+ 712 495
+ 739 565
+ 776 622
+ 803 691
+ 834 792
+ 881 889
+ 914 1059
+ 957 1244
+ 975 1375
+ 996 1549
+ 1016 1617
+ 1021 1677
+ 1024 1683
+ >;
+ idle-cost-data = <
+ 10 10 0 0
+ >;
+ };
+ CLUSTER_COST_0: cluster-cost0 {
+ busy-cost-data = <
+ 65 17
+ 80 18
+ 96 18
+ 113 20
+ 130 21
+ 147 22
+ 164 23
+ 181 24
+ 194 27
+ 211 29
+ 228 30
+ 243 32
+ 258 33
+ 275 35
+ 292 38
+ 308 39
+ 326 42
+ 342 46
+ 368 48
+ 384 53
+ 401 59
+ 419 66
+ >;
+ idle-cost-data = <
+ 31 31 31 0
+ >;
+ };
+ CLUSTER_COST_1: cluster-cost1 {
+ busy-cost-data = <
+ 129 24
+ 148 25
+ 182 26
+ 216 29
+ 247 30
+ 278 33
+ 312 35
+ 344 37
+ 391 38
+ 419 40
+ 453 43
+ 487 44
+ 509 46
+ 546 50
+ 581 54
+ 615 60
+ 650 63
+ 676 70
+ 712 74
+ 739 80
+ 776 87
+ 803 96
+ 834 104
+ 881 120
+ 914 130
+ 957 171
+ 975 178
+ 996 185
+ 1016 200
+ 1021 202
+ 1024 203
+ >;
+ idle-cost-data = <
+ 50 50 50 0
+ >;
+ };
+ };
};
soc: soc { };
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 7c584e2ea476..370f1452710f 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -4062,7 +4062,7 @@ static int binder_wait_for_work(struct binder_thread *thread,
binder_inner_proc_lock(proc);
list_del_init(&thread->waiting_thread_node);
if (signal_pending(current)) {
- ret = -ERESTARTSYS;
+ ret = -EINTR;
break;
}
}
@@ -4991,7 +4991,7 @@ err:
if (thread)
thread->looper_need_return = false;
wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2);
- if (ret && ret != -ERESTARTSYS)
+ if (ret && ret != -EINTR)
pr_info("%d:%d ioctl %x %lx returned %d\n", proc->pid, current->pid, cmd, arg, ret);
err_unlocked:
trace_binder_ioctl_done(ret);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 15fcf2cac971..53226f33ea98 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1187,6 +1187,7 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+ policy->iowait_boost_enable = true;
/* cpuinfo and default policy values */
policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
diff --git a/drivers/cpuidle/lpm-levels.c b/drivers/cpuidle/lpm-levels.c
index 1eaef20e5ed5..dca59eadc6c2 100644
--- a/drivers/cpuidle/lpm-levels.c
+++ b/drivers/cpuidle/lpm-levels.c
@@ -187,7 +187,7 @@ static uint32_t least_cluster_latency(struct lpm_cluster *cluster,
uint32_t latency = 0;
int i;
- if (!cluster->list.next) {
+ if (list_empty(&cluster->list)) {
for (i = 0; i < cluster->nlevels; i++) {
level = &cluster->levels[i];
pwr_params = &level->pwr;
@@ -691,7 +691,7 @@ static void update_history(struct cpuidle_device *dev, int idx);
static int cpu_power_select(struct cpuidle_device *dev,
struct lpm_cpu *cpu)
{
- int best_level = -1;
+ int best_level = 0;
uint32_t latency_us = pm_qos_request_for_cpu(PM_QOS_CPU_DMA_LATENCY,
dev->cpu);
s64 sleep_us = ktime_to_us(tick_nohz_get_sleep_length());
@@ -705,8 +705,6 @@ static int cpu_power_select(struct cpuidle_device *dev,
uint32_t *min_residency = get_per_cpu_min_residency(dev->cpu);
uint32_t *max_residency = get_per_cpu_max_residency(dev->cpu);
- if (!cpu)
- return -EINVAL;
if ((sleep_disabled && !cpu_isolated(dev->cpu)) || sleep_us < 0)
return 0;
@@ -1536,17 +1534,11 @@ static int lpm_cpuidle_select(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{
struct lpm_cluster *cluster = per_cpu(cpu_cluster, dev->cpu);
- int idx;
if (!cluster)
return 0;
- idx = cpu_power_select(dev, cluster->cpu);
-
- if (idx < 0)
- return -EPERM;
-
- return idx;
+ return cpu_power_select(dev, cluster->cpu);
}
static void update_history(struct cpuidle_device *dev, int idx)
@@ -1591,9 +1583,6 @@ static int lpm_cpuidle_enter(struct cpuidle_device *dev,
int64_t start_time = ktime_to_ns(ktime_get()), end_time;
struct power_params *pwr_params;
- if (idx < 0)
- return -EINVAL;
-
pwr_params = &cluster->cpu->levels[idx].pwr;
sched_set_cpu_cstate(smp_processor_id(), idx + 1,
pwr_params->energy_overhead, pwr_params->latency_us);
diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index 22496ad167a0..ee8c09717597 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -631,6 +631,7 @@ static void msm_gpio_irq_enable(struct irq_data *d)
static void msm_gpio_irq_unmask(struct irq_data *d)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ uint32_t irqtype = irqd_get_trigger_type(d);
struct msm_pinctrl *pctrl = to_msm_pinctrl(gc);
const struct msm_pingroup *g;
unsigned long flags;
@@ -640,6 +641,12 @@ static void msm_gpio_irq_unmask(struct irq_data *d)
spin_lock_irqsave(&pctrl->lock, flags);
+ if (irqtype & (IRQF_TRIGGER_HIGH | IRQF_TRIGGER_LOW)) {
+ val = readl_relaxed(pctrl->regs + g->intr_status_reg);
+ val &= ~BIT(g->intr_status_bit);
+ writel_relaxed(val, pctrl->regs + g->intr_status_reg);
+ }
+
val = readl(pctrl->regs + g->intr_status_reg);
val &= ~BIT(g->intr_status_bit);
writel(val, pctrl->regs + g->intr_status_reg);
@@ -905,7 +912,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl)
ret = gpiochip_irqchip_add(chip,
&msm_gpio_irq_chip,
0,
- handle_edge_irq,
+ handle_fasteoi_irq,
IRQ_TYPE_NONE);
if (ret) {
dev_err(pctrl->dev, "Failed to add irqchip to gpiochip\n");
diff --git a/drivers/power/supply/qcom/qpnp-smb2.c b/drivers/power/supply/qcom/qpnp-smb2.c
index 5fae7b99d88f..b0704af49353 100644
--- a/drivers/power/supply/qcom/qpnp-smb2.c
+++ b/drivers/power/supply/qcom/qpnp-smb2.c
@@ -917,6 +917,7 @@ static int smb2_init_dc_psy(struct smb2 *chip)
*************************/
static enum power_supply_property smb2_batt_props[] = {
+ POWER_SUPPLY_PROP_CHARGING_ENABLED,
POWER_SUPPLY_PROP_INPUT_SUSPEND,
POWER_SUPPLY_PROP_STATUS,
POWER_SUPPLY_PROP_HEALTH,
@@ -967,6 +968,9 @@ static int smb2_batt_get_prop(struct power_supply *psy,
case POWER_SUPPLY_PROP_PRESENT:
rc = smblib_get_prop_batt_present(chg, val);
break;
+ case POWER_SUPPLY_PROP_CHARGING_ENABLED:
+ val->intval = !get_effective_result(chg->chg_disable_votable);
+ break;
case POWER_SUPPLY_PROP_INPUT_SUSPEND:
rc = smblib_get_prop_input_suspend(chg, val);
break;
@@ -1079,6 +1083,9 @@ static int smb2_batt_set_prop(struct power_supply *psy,
struct smb_charger *chg = power_supply_get_drvdata(psy);
switch (prop) {
+ case POWER_SUPPLY_PROP_CHARGING_ENABLED:
+ vote(chg->chg_disable_votable, USER_VOTER, !!!val->intval, 0);
+ break;
case POWER_SUPPLY_PROP_INPUT_SUSPEND:
rc = smblib_set_prop_input_suspend(chg, val);
break;
@@ -1163,6 +1170,7 @@ static int smb2_batt_prop_is_writeable(struct power_supply *psy,
enum power_supply_property psp)
{
switch (psp) {
+ case POWER_SUPPLY_PROP_CHARGING_ENABLED:
case POWER_SUPPLY_PROP_INPUT_SUSPEND:
case POWER_SUPPLY_PROP_SYSTEM_TEMP_LEVEL:
case POWER_SUPPLY_PROP_CAPACITY:
diff --git a/drivers/power/supply/qcom/smb-lib.c b/drivers/power/supply/qcom/smb-lib.c
index 81623c65ea8e..1782f23fafa7 100644
--- a/drivers/power/supply/qcom/smb-lib.c
+++ b/drivers/power/supply/qcom/smb-lib.c
@@ -4457,6 +4457,7 @@ static void rdstd_cc2_detach_work(struct work_struct *work)
{
int rc;
u8 stat4, stat5;
+ bool lock = false;
struct smb_charger *chg = container_of(work, struct smb_charger,
rdstd_cc2_detach_work);
@@ -4519,9 +4520,28 @@ static void rdstd_cc2_detach_work(struct work_struct *work)
rc = smblib_masked_write(chg, TYPE_C_INTRPT_ENB_SOFTWARE_CTRL_REG,
EXIT_SNK_BASED_ON_CC_BIT, 0);
smblib_reg_block_restore(chg, cc2_detach_settings);
- mutex_lock(&chg->lock);
+
+ /*
+ * Mutex acquisition deadlock can happen while cancelling this work
+ * during pd_hard_reset from the function smblib_cc2_sink_removal_exit
+ * which is called in the same lock context that we try to acquire in
+ * this work routine.
+ * Check if this work is running during pd_hard_reset and use trylock
+ * instead of mutex_lock to prevent any deadlock if mutext is already
+ * held.
+ */
+ if (chg->pd_hard_reset) {
+ if (mutex_trylock(&chg->lock))
+ lock = true;
+ } else {
+ mutex_lock(&chg->lock);
+ lock = true;
+ }
+
smblib_usb_typec_change(chg);
- mutex_unlock(&chg->lock);
+
+ if (lock)
+ mutex_unlock(&chg->lock);
return;
rerun:
diff --git a/drivers/soc/qcom/smp2p_sleepstate.c b/drivers/soc/qcom/smp2p_sleepstate.c
index 2ef25e48ce50..1f0809b61220 100644
--- a/drivers/soc/qcom/smp2p_sleepstate.c
+++ b/drivers/soc/qcom/smp2p_sleepstate.c
@@ -20,7 +20,8 @@
#define SET_DELAY (2 * HZ)
#define PROC_AWAKE_ID 12 /* 12th bit */
-static int slst_gpio_base_id;
+int slst_gpio_base_id;
+
/**
* sleepstate_pm_notifier() - PM notifier callback function.
@@ -36,13 +37,11 @@ static int sleepstate_pm_notifier(struct notifier_block *nb,
{
switch (event) {
case PM_SUSPEND_PREPARE:
- gpio_set_value(slst_gpio_base_id + PROC_AWAKE_ID, 0);
msleep(25); /* To be tuned based on SMP2P latencies */
msm_ipc_router_set_ws_allowed(true);
break;
case PM_POST_SUSPEND:
- gpio_set_value(slst_gpio_base_id + PROC_AWAKE_ID, 1);
msleep(25); /* To be tuned based on SMP2P latencies */
msm_ipc_router_set_ws_allowed(false);
break;
diff --git a/drivers/video/fbdev/msm/mdss_mdp_intf_video.c b/drivers/video/fbdev/msm/mdss_mdp_intf_video.c
index 3761fa4af0eb..caa910db508c 100644
--- a/drivers/video/fbdev/msm/mdss_mdp_intf_video.c
+++ b/drivers/video/fbdev/msm/mdss_mdp_intf_video.c
@@ -1236,7 +1236,7 @@ static int mdss_mdp_video_wait4comp(struct mdss_mdp_ctl *ctl, void *arg)
if (rc == 0) {
pr_warn("vsync wait timeout %d, fallback to poll mode\n",
ctl->num);
- ctx->polling_en++;
+ ctx->polling_en = true;
rc = mdss_mdp_video_pollwait(ctl);
} else {
rc = 0;
diff --git a/drivers/video/fbdev/msm/msm_mdss_io_8974.c b/drivers/video/fbdev/msm/msm_mdss_io_8974.c
index 922c4440ba82..000beebe0375 100644
--- a/drivers/video/fbdev/msm/msm_mdss_io_8974.c
+++ b/drivers/video/fbdev/msm/msm_mdss_io_8974.c
@@ -1321,16 +1321,16 @@ static void mdss_dsi_phy_regulator_ctrl(struct mdss_dsi_ctrl_pdata *ctrl,
mdss_dsi_20nm_phy_regulator_enable(ctrl);
break;
default:
- /*
- * For dual dsi case, do not reconfigure dsi phy
- * regulator if the other dsi controller is still
- * active.
- */
- if (!mdss_dsi_is_hw_config_dual(sdata) ||
- (other_ctrl && (!other_ctrl->is_phyreg_enabled
- || other_ctrl->mmss_clamp)))
- mdss_dsi_28nm_phy_regulator_enable(ctrl);
- break;
+ /*
+ * For dual dsi case, do not reconfigure dsi phy
+ * regulator if the other dsi controller is still
+ * active.
+ */
+ if (!mdss_dsi_is_hw_config_dual(sdata) ||
+ (other_ctrl && (!other_ctrl->is_phyreg_enabled
+ || other_ctrl->mmss_clamp)))
+ mdss_dsi_28nm_phy_regulator_enable(ctrl);
+ break;
}
}
ctrl->is_phyreg_enabled = 1;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index e16afc80f810..c7402cb76f11 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -233,12 +233,12 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
goto out_mmput;
}
- down_read(&mm->mmap_sem);
+ spin_lock(&mm->arg_lock);
arg_start = mm->arg_start;
arg_end = mm->arg_end;
env_start = mm->env_start;
env_end = mm->env_end;
- up_read(&mm->mmap_sem);
+ spin_unlock(&mm->arg_lock);
BUG_ON(arg_start > arg_end);
BUG_ON(env_start > env_end);
@@ -990,10 +990,10 @@ static ssize_t environ_read(struct file *file, char __user *buf,
if (!atomic_inc_not_zero(&mm->mm_users))
goto free;
- down_read(&mm->mmap_sem);
+ spin_lock(&mm->arg_lock);
env_start = mm->env_start;
env_end = mm->env_end;
- up_read(&mm->mmap_sem);
+ spin_unlock(&mm->arg_lock);
while (count > 0) {
size_t this_len, max_len;
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 9302d016b89f..8e9d08dfbd18 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -123,6 +123,9 @@ struct cpufreq_policy {
unsigned int up_transition_delay_us;
unsigned int down_transition_delay_us;
+ /* Boost switch for tasks with p->in_iowait set */
+ bool iowait_boost_enable;
+
/* Cached frequency lookup from cpufreq_driver_resolve_freq. */
unsigned int cached_target_freq;
int cached_resolved_idx;
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 021b1e9ff6cd..8aed56931361 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -208,6 +208,7 @@ extern struct task_group root_task_group;
.policy = SCHED_NORMAL, \
.cpus_allowed = CPU_MASK_ALL, \
.nr_cpus_allowed= NR_CPUS, \
+ .cpus_requested = CPU_MASK_ALL, \
.mm = NULL, \
.active_mm = &init_mm, \
.restart_block = { \
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8da001eb82aa..0e57f41bde84 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -136,6 +136,9 @@ struct irq_domain;
* @node: node index useful for balancing
* @handler_data: per-IRQ data for the irq_chip methods
* @affinity: IRQ affinity on SMP
+ * @effective_affinity: The effective IRQ affinity on SMP as some irq
+ * chips do not allow multi CPU destinations.
+ * A subset of @affinity.
* @msi_desc: MSI descriptor
*/
struct irq_common_data {
@@ -146,6 +149,9 @@ struct irq_common_data {
void *handler_data;
struct msi_desc *msi_desc;
cpumask_var_t affinity;
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ cpumask_var_t effective_affinity;
+#endif
};
/**
@@ -690,6 +696,29 @@ static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
return d->common->affinity;
}
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+static inline
+struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
+{
+ return d->common->effective_affinity;
+}
+static inline void irq_data_update_effective_affinity(struct irq_data *d,
+ const struct cpumask *m)
+{
+ cpumask_copy(d->common->effective_affinity, m);
+}
+#else
+static inline void irq_data_update_effective_affinity(struct irq_data *d,
+ const struct cpumask *m)
+{
+}
+static inline
+struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
+{
+ return d->common->affinity;
+}
+#endif
+
unsigned int arch_dynirq_lower_bound(unsigned int from);
int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 29c17fae9bbf..1019e8d3c88f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -442,6 +442,8 @@ struct mm_struct {
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */
unsigned long stack_vm; /* VM_GROWSUP/DOWN */
unsigned long def_flags;
+
+ spinlock_t arg_lock; /* protect the below fields */
unsigned long start_code, end_code, start_data, end_data;
unsigned long start_brk, brk, start_stack;
unsigned long arg_start, arg_end, env_start, env_end;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 70c1f7f9e4fa..4e212132a274 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1668,6 +1668,15 @@ struct task_struct {
const struct sched_class *sched_class;
struct sched_entity se;
struct sched_rt_entity rt;
+#ifdef CONFIG_SCHED_WALT
+ struct ravg ravg;
+ /*
+ * 'init_load_pct' represents the initial task load assigned to children
+ * of this task
+ */
+ u32 init_load_pct;
+ u64 last_sleep_ts;
+#endif
#ifdef CONFIG_SCHED_HMP
struct ravg ravg;
/*
@@ -1700,6 +1709,7 @@ struct task_struct {
unsigned int policy;
int nr_cpus_allowed;
cpumask_t cpus_allowed;
+ cpumask_t cpus_requested;
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
@@ -3723,6 +3733,7 @@ static inline unsigned long rlimit_max(unsigned int limit)
#define SCHED_CPUFREQ_DL (1U << 1)
#define SCHED_CPUFREQ_IOWAIT (1U << 2)
#define SCHED_CPUFREQ_INTERCLUSTER_MIG (1U << 3)
+#define SCHED_CPUFREQ_WALT (1U << 4)
#ifdef CONFIG_CPU_FREQ
struct update_util_data {
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 1e1fcb8791a7..c85fe9872d07 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -41,6 +41,12 @@ extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_sync_hint_enable;
extern unsigned int sysctl_sched_cstate_aware;
+#ifdef CONFIG_SCHED_WALT
+extern unsigned int sysctl_sched_use_walt_cpu_util;
+extern unsigned int sysctl_sched_use_walt_task_util;
+extern unsigned int sysctl_sched_walt_init_task_load_pct;
+extern unsigned int sysctl_sched_walt_cpu_high_irqload;
+#endif
#ifdef CONFIG_SCHED_HMP
diff --git a/include/linux/time.h b/include/linux/time.h
index 62cc50700004..cbb55e004342 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -9,6 +9,7 @@
extern struct timezone sys_tz;
#define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1)
+#define TIMER_LOCK_TIGHT_LOOP_DELAY_NS 350
static inline int timespec_equal(const struct timespec *a,
const struct timespec *b)
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 73cd7e502d4c..70d6012c89aa 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -1903,6 +1903,7 @@ TRACE_EVENT(walt_update_task_ravg,
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( pid_t, cur_pid )
+ __field(unsigned int, cur_freq )
__field( u64, wallclock )
__field( u64, mark_start )
__field( u64, delta_m )
@@ -1930,6 +1931,7 @@ TRACE_EVENT(walt_update_task_ravg,
__entry->evt = evt;
__entry->cpu = rq->cpu;
__entry->cur_pid = rq->curr->pid;
+ __entry->cur_freq = rq->cur_freq;
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->mark_start = p->ravg.mark_start;
@@ -1948,10 +1950,11 @@ TRACE_EVENT(walt_update_task_ravg,
__entry->active_windows = p->ravg.active_windows;
),
- TP_printk("wc %llu ws %llu delta %llu event %d cpu %d cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
+ TP_printk("wc %llu ws %llu delta %llu event %d cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
" cs %llu ps %llu util %lu cur_window %u prev_window %u active_wins %u"
, __entry->wallclock, __entry->win_start, __entry->delta,
- __entry->evt, __entry->cpu, __entry->cur_pid,
+ __entry->evt, __entry->cpu,
+ __entry->cur_freq, __entry->cur_pid,
__entry->pid, __entry->comm, __entry->mark_start,
__entry->delta_m, __entry->demand,
__entry->sum, __entry->irqtime,
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f3e6608313a2..a0bf3a7ce550 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -852,6 +852,20 @@ void rebuild_sched_domains(void)
put_online_cpus();
}
+static int update_cpus_allowed(struct cpuset *cs, struct task_struct *p,
+ const struct cpumask *new_mask)
+{
+ int ret;
+
+ if (cpumask_subset(&p->cpus_requested, cs->cpus_requested)) {
+ ret = set_cpus_allowed_ptr(p, &p->cpus_requested);
+ if (!ret)
+ return ret;
+ }
+
+ return set_cpus_allowed_ptr(p, new_mask);
+}
+
/**
* update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
* @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
@@ -867,7 +881,7 @@ static void update_tasks_cpumask(struct cpuset *cs)
css_task_iter_start(&cs->css, &it);
while ((task = css_task_iter_next(&it)))
- set_cpus_allowed_ptr(task, cs->effective_cpus);
+ update_cpus_allowed(cs, task, cs->effective_cpus);
css_task_iter_end(&it);
}
@@ -1556,7 +1570,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
* can_attach beforehand should guarantee that this doesn't
* fail. TODO: have a better way to handle failure here
*/
- WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
+ WARN_ON_ONCE(update_cpus_allowed(cs, task, cpus_attach));
cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
cpuset_update_task_spread_flag(cs, task);
diff --git a/kernel/fork.c b/kernel/fork.c
index a21adc0155b9..c25ebf6dd7f9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -621,6 +621,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
mm->pinned_vm = 0;
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
spin_lock_init(&mm->page_table_lock);
+ spin_lock_init(&mm->arg_lock);
mm_init_cpumask(mm);
mm_init_aio(mm);
mm_init_owner(mm, p);
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 3b48dab80164..5d00ba9af4ec 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -21,6 +21,10 @@ config GENERIC_IRQ_SHOW
config GENERIC_IRQ_SHOW_LEVEL
bool
+# Supports effective affinity mask
+config GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ bool
+
# Facility to allocate a hardware interrupt. This is legacy support
# and should not be used in new code. Use irq domains instead.
config GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 4684b7595e63..9fad2dc50452 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -36,6 +36,10 @@ static bool migrate_one_irq(struct irq_desc *desc)
affinity = &available_cpus;
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+ const struct cpumask *default_affinity;
+
+ default_affinity = desc->affinity_hint ? : irq_default_affinity;
+
/*
* The order of preference for selecting a fallback CPU is
*
@@ -45,9 +49,9 @@ static bool migrate_one_irq(struct irq_desc *desc)
*/
cpumask_andnot(&available_cpus, cpu_online_mask,
cpu_isolated_mask);
- if (cpumask_intersects(&available_cpus, irq_default_affinity))
+ if (cpumask_intersects(&available_cpus, default_affinity))
cpumask_and(&available_cpus, &available_cpus,
- irq_default_affinity);
+ default_affinity);
else if (cpumask_empty(&available_cpus))
affinity = cpu_online_mask;
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 52fbf88cd2d8..e0de4682f57a 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -43,8 +43,19 @@ static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node)
gfp, node))
return -ENOMEM;
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ if (!zalloc_cpumask_var_node(&desc->irq_common_data.effective_affinity,
+ GFP_KERNEL, node)) {
+ free_cpumask_var(desc->irq_common_data.affinity);
+ return -ENOMEM;
+ }
+#endif
+
#ifdef CONFIG_GENERIC_PENDING_IRQ
if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ free_cpumask_var(desc->irq_common_data.effective_affinity);
+#endif
free_cpumask_var(desc->irq_common_data.affinity);
return -ENOMEM;
}
@@ -127,6 +138,9 @@ static void free_masks(struct irq_desc *desc)
free_cpumask_var(desc->pending_mask);
#endif
free_cpumask_var(desc->irq_common_data.affinity);
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ free_cpumask_var(desc->irq_common_data.effective_affinity);
+#endif
}
#else
static inline void free_masks(struct irq_desc *desc) { }
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index b05509af0352..9da4f2b075d1 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -37,19 +37,47 @@ static struct proc_dir_entry *root_irq_dir;
#ifdef CONFIG_SMP
+enum {
+ AFFINITY,
+ AFFINITY_LIST,
+ EFFECTIVE,
+ EFFECTIVE_LIST,
+};
+
static int show_irq_affinity(int type, struct seq_file *m, void *v)
{
struct irq_desc *desc = irq_to_desc((long)m->private);
- const struct cpumask *mask = desc->irq_common_data.affinity;
+ const struct cpumask *mask;
+ switch (type) {
+ case AFFINITY:
+ case AFFINITY_LIST:
+ mask = desc->irq_common_data.affinity;
#ifdef CONFIG_GENERIC_PENDING_IRQ
- if (irqd_is_setaffinity_pending(&desc->irq_data))
- mask = desc->pending_mask;
+ if (irqd_is_setaffinity_pending(&desc->irq_data))
+ mask = desc->pending_mask;
#endif
- if (type)
+ break;
+ case EFFECTIVE:
+ case EFFECTIVE_LIST:
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ mask = desc->irq_common_data.effective_affinity;
+ break;
+#else
+ return -EINVAL;
+#endif
+ };
+
+ switch (type) {
+ case AFFINITY_LIST:
+ case EFFECTIVE_LIST:
seq_printf(m, "%*pbl\n", cpumask_pr_args(mask));
- else
+ break;
+ case AFFINITY:
+ case EFFECTIVE:
seq_printf(m, "%*pb\n", cpumask_pr_args(mask));
+ break;
+ }
return 0;
}
@@ -80,12 +108,12 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
int no_irq_affinity;
static int irq_affinity_proc_show(struct seq_file *m, void *v)
{
- return show_irq_affinity(0, m, v);
+ return show_irq_affinity(AFFINITY, m, v);
}
static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
{
- return show_irq_affinity(1, m, v);
+ return show_irq_affinity(AFFINITY_LIST, m, v);
}
@@ -188,6 +216,44 @@ static const struct file_operations irq_affinity_list_proc_fops = {
.write = irq_affinity_list_proc_write,
};
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+static int irq_effective_aff_proc_show(struct seq_file *m, void *v)
+{
+ return show_irq_affinity(EFFECTIVE, m);
+}
+
+static int irq_effective_aff_list_proc_show(struct seq_file *m, void *v)
+{
+ return show_irq_affinity(EFFECTIVE_LIST, m);
+}
+
+static int irq_effective_aff_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, irq_effective_aff_proc_show, PDE_DATA(inode));
+}
+
+static int irq_effective_aff_list_proc_open(struct inode *inode,
+ struct file *file)
+{
+ return single_open(file, irq_effective_aff_list_proc_show,
+ PDE_DATA(inode));
+}
+
+static const struct file_operations irq_effective_aff_proc_fops = {
+ .open = irq_effective_aff_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations irq_effective_aff_list_proc_fops = {
+ .open = irq_effective_aff_list_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif
+
static int default_affinity_show(struct seq_file *m, void *v)
{
seq_printf(m, "%*pb\n", cpumask_pr_args(irq_default_affinity));
@@ -368,6 +434,12 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
proc_create_data("node", 0444, desc->dir,
&irq_node_proc_fops, (void *)(long)irq);
+# ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ proc_create_data("effective_affinity", 0444, desc->dir,
+ &irq_effective_aff_proc_fops, (void *)(long)irq);
+ proc_create_data("effective_affinity_list", 0444, desc->dir,
+ &irq_effective_aff_list_proc_fops, (void *)(long)irq);
+# endif
#endif
proc_create_data("spurious", 0444, desc->dir,
@@ -388,6 +460,10 @@ void unregister_irq_proc(unsigned int irq, struct irq_desc *desc)
remove_proc_entry("affinity_hint", desc->dir);
remove_proc_entry("smp_affinity_list", desc->dir);
remove_proc_entry("node", desc->dir);
+# ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ remove_proc_entry("effective_affinity", desc->dir);
+ remove_proc_entry("effective_affinity_list", desc->dir);
+# endif
#endif
remove_proc_entry("spurious", desc->dir);
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index e6eceb0aa496..3e3ae5ed8100 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -477,8 +477,6 @@ int pm_qos_request_for_cpumask(int pm_qos_class, struct cpumask *mask)
val = c->default_value;
for_each_cpu(cpu, mask) {
- if (cpu_isolated(cpu))
- continue;
switch (c->type) {
case PM_QOS_MIN:
@@ -545,19 +543,29 @@ static void pm_qos_irq_release(struct kref *ref)
}
static void pm_qos_irq_notify(struct irq_affinity_notify *notify,
- const cpumask_t *mask)
+ const cpumask_t *unused_mask)
{
unsigned long flags;
struct pm_qos_request *req = container_of(notify,
struct pm_qos_request, irq_notify);
struct pm_qos_constraints *c =
pm_qos_array[req->pm_qos_class]->constraints;
+ struct irq_desc *desc = irq_to_desc(req->irq);
+ struct cpumask *new_affinity =
+ irq_data_get_effective_affinity_mask(&desc->irq_data);
+ bool affinity_changed = false;
spin_lock_irqsave(&pm_qos_lock, flags);
- cpumask_copy(&req->cpus_affine, mask);
+ if (!cpumask_equal(&req->cpus_affine, new_affinity)) {
+ cpumask_copy(&req->cpus_affine, new_affinity);
+ affinity_changed = true;
+ }
+
spin_unlock_irqrestore(&pm_qos_lock, flags);
- pm_qos_update_target(c, req, PM_QOS_UPDATE_REQ, req->node.prio);
+ if (affinity_changed)
+ pm_qos_update_target(c, req, PM_QOS_UPDATE_REQ,
+ req->node.prio);
}
#endif
@@ -601,9 +609,17 @@ void pm_qos_add_request(struct pm_qos_request *req,
if (!desc)
return;
- mask = desc->irq_data.common->affinity;
- /* Get the current affinity */
+ /*
+ * If the IRQ is not started, the effective affinity
+ * won't be set. So fallback to the default affinity.
+ */
+ mask = irq_data_get_effective_affinity_mask(
+ &desc->irq_data);
+ if (cpumask_empty(mask))
+ mask = irq_data_get_affinity_mask(
+ &desc->irq_data);
+
cpumask_copy(&req->cpus_affine, mask);
req->irq_notify.irq = req->irq;
req->irq_notify.notify = pm_qos_irq_notify;
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 6e7832ee6d74..18c322fe8b73 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -34,6 +34,10 @@
#include "power.h"
+#include <linux/gpio.h>
+extern int slst_gpio_base_id;
+#define PROC_AWAKE_ID 12 /* 12th bit */
+
const char *pm_labels[] = { "mem", "standby", "freeze", NULL };
const char *pm_states[PM_SUSPEND_MAX];
@@ -563,7 +567,9 @@ int pm_suspend(suspend_state_t state)
return -EINVAL;
pm_suspend_marker("entry");
+ gpio_set_value(slst_gpio_base_id + PROC_AWAKE_ID, 0);
error = enter_state(state);
+ gpio_set_value(slst_gpio_base_id + PROC_AWAKE_ID, 1);
if (error) {
suspend_stats.fail++;
dpm_save_failed_errno(error);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 5f748c5a40f0..90fdf77dab7e 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -650,6 +650,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
struct rcu_head *list;
struct rcu_head *next;
LIST_HEAD(rcu_tasks_holdouts);
+ int fract;
/* Run on housekeeping CPUs by default. Sysadm can move if desired. */
housekeeping_affine(current);
@@ -731,13 +732,25 @@ static int __noreturn rcu_tasks_kthread(void *arg)
* holdouts. When the list is empty, we are done.
*/
lastreport = jiffies;
- while (!list_empty(&rcu_tasks_holdouts)) {
+
+ /* Start off with HZ/10 wait and slowly back off to 1 HZ wait*/
+ fract = 10;
+
+ for (;;) {
bool firstreport;
bool needreport;
int rtst;
struct task_struct *t1;
- schedule_timeout_interruptible(HZ);
+ if (list_empty(&rcu_tasks_holdouts))
+ break;
+
+ /* Slowly back off waiting for holdouts */
+ schedule_timeout_interruptible(HZ/fract);
+
+ if (fract > 1)
+ fract--;
+
rtst = READ_ONCE(rcu_task_stall_timeout);
needreport = rtst > 0 &&
time_after(jiffies, lastreport + rtst);
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 7dde1b9918e4..ea301717538f 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -19,6 +19,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
obj-y += wait.o completion.o idle.o sched_avg.o
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o energy.o
+obj-$(CONFIG_SCHED_WALT) += walt.o
obj-$(CONFIG_SCHED_HMP) += hmp.o boost.o
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
obj-$(CONFIG_SCHEDSTATS) += stats.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 40a44876c74c..a8d2c50737ee 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -78,6 +78,7 @@
#include <linux/irq.h>
#include <linux/sched/core_ctl.h>
#include <linux/cpufreq_times.h>
+#include <linux/prefetch.h>
#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -97,6 +98,7 @@
#define CREATE_TRACE_POINTS
#include <trace/events/sched.h>
+#include "walt.h"
ATOMIC_NOTIFIER_HEAD(load_alert_notifier_head);
@@ -1084,6 +1086,33 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
}
#ifdef CONFIG_SMP
+
+static inline bool is_per_cpu_kthread(struct task_struct *p)
+{
+ if (!(p->flags & PF_KTHREAD))
+ return false;
+
+ if (p->nr_cpus_allowed != 1)
+ return false;
+
+ return true;
+}
+
+/*
+ * Per-CPU kthreads are allowed to run on !actie && online CPUs, see
+ * __set_cpus_allowed_ptr() and select_fallback_rq().
+ */
+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
+{
+ if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+ return false;
+
+ if (is_per_cpu_kthread(p))
+ return cpu_online(cpu);
+
+ return cpu_active(cpu);
+}
+
/*
* This is how migration works:
*
@@ -1143,11 +1172,8 @@ static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int dest_
{
int src_cpu;
- if (unlikely(!cpu_active(dest_cpu)))
- return rq;
-
/* Affinity changed (again). */
- if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
+ if (!is_cpu_allowed(p, dest_cpu))
return rq;
src_cpu = cpu_of(rq);
@@ -1364,6 +1390,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
p->se.nr_migrations++;
perf_event_task_migrate(p);
+ walt_fixup_busy_time(p, new_cpu);
fixup_busy_time(p, new_cpu);
}
@@ -1648,9 +1675,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p, bool allow_iso)
for (;;) {
/* Any allowed, online CPU? */
for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
- if (!cpu_online(dest_cpu))
- continue;
- if (!cpu_active(dest_cpu))
+ if (!is_cpu_allowed(p, dest_cpu))
continue;
if (cpu_isolated(dest_cpu)) {
if (allow_iso)
@@ -2132,6 +2157,9 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags,
raw_spin_lock(&rq->lock);
old_load = task_load(p);
+ wallclock = walt_ktime_clock();
+ walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
+ walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
wallclock = sched_ktime_clock();
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
@@ -2230,6 +2258,11 @@ static void try_to_wake_up_local(struct task_struct *p)
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
cpufreq_update_util(rq, 0);
+
+ wallclock = walt_ktime_clock();
+
+ walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
+ walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
ttwu_activate(rq, p, ENQUEUE_WAKEUP);
note_task_waking(p, wallclock);
}
@@ -2362,6 +2395,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
#endif
INIT_LIST_HEAD(&p->se.group_node);
+ walt_init_new_task_load(p);
#ifdef CONFIG_FAIR_GROUP_SCHED
p->se.cfs_rq = NULL;
@@ -2646,6 +2680,7 @@ void wake_up_new_task(struct task_struct *p)
struct rq *rq;
add_new_task_to_grp(p);
+ walt_init_new_task_load(p);
raw_spin_lock_irqsave(&p->pi_lock, flags);
p->state = TASK_RUNNING;
@@ -2664,6 +2699,7 @@ void wake_up_new_task(struct task_struct *p)
#endif
rq = __task_rq_lock(p);
mark_task_starting(p);
+ walt_mark_task_starting(p);
update_rq_clock(rq);
post_init_entity_util_avg(&p->se);
activate_task(rq, p, ENQUEUE_WAKEUP_NEW);
@@ -3134,6 +3170,23 @@ EXPORT_PER_CPU_SYMBOL(kstat);
EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
/*
+ * The function fair_sched_class.update_curr accesses the struct curr
+ * and its field curr->exec_start; when called from task_sched_runtime(),
+ * we observe a high rate of cache misses in practice.
+ * Prefetching this data results in improved performance.
+ */
+static inline void prefetch_curr_exec_start(struct task_struct *p)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ struct sched_entity *curr = (&p->se)->cfs_rq->curr;
+#else
+ struct sched_entity *curr = (&task_rq(p)->cfs)->curr;
+#endif
+ prefetch(curr);
+ prefetch(&curr->exec_start);
+}
+
+/*
* Return accounted runtime for the task.
* In case the task is currently running, return the runtime plus current's
* pending runtime that have not been accounted yet.
@@ -3167,6 +3220,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
* thread, breaking clock_gettime().
*/
if (task_current(rq, p) && task_on_rq_queued(p)) {
+ prefetch_curr_exec_start(p);
update_rq_clock(rq);
p->sched_class->update_curr(rq);
}
@@ -3194,10 +3248,13 @@ void scheduler_tick(void)
raw_spin_lock(&rq->lock);
old_load = task_load(curr);
+ walt_set_window_start(rq);
set_window_start(rq);
update_rq_clock(rq);
curr->sched_class->task_tick(rq, curr, 0);
update_cpu_load_active(rq);
+ walt_update_task_ravg(rq->curr, rq, TASK_UPDATE,
+ walt_ktime_clock(), 0);
calc_global_load_tick(rq);
wallclock = sched_ktime_clock();
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
@@ -3561,6 +3618,9 @@ static void __sched notrace __schedule(bool preempt)
update_rq_clock(rq);
next = pick_next_task(rq, prev);
+ wallclock = walt_ktime_clock();
+ walt_update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0);
+ walt_update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
clear_tsk_need_resched(prev);
clear_preempt_need_resched();
rq->clock_skip_update = 0;
@@ -4902,6 +4962,9 @@ again:
retval = -EINVAL;
}
+ if (!retval && !(p->flags & PF_KTHREAD))
+ cpumask_and(&p->cpus_requested, in_mask, cpu_possible_mask);
+
out_free_new_mask:
free_cpumask_var(new_mask);
out_free_cpus_allowed:
@@ -5923,12 +5986,6 @@ int sched_isolate_cpu(int cpu)
cpumask_andnot(&avail_cpus, cpu_online_mask, cpu_isolated_mask);
- /* We cannot isolate ALL cpus in the system */
- if (cpumask_weight(&avail_cpus) == 1) {
- ret_code = -EINVAL;
- goto out;
- }
-
if (!cpu_online(cpu)) {
ret_code = -EINVAL;
goto out;
@@ -5937,6 +5994,13 @@ int sched_isolate_cpu(int cpu)
if (++cpu_isolation_vote[cpu] > 1)
goto out;
+ /* We cannot isolate ALL cpus in the system */
+ if (cpumask_weight(&avail_cpus) == 1) {
+ --cpu_isolation_vote[cpu];
+ ret_code = -EINVAL;
+ goto out;
+ }
+
/*
* There is a race between watchdog being enabled by hotplug and
* core isolation disabling the watchdog. When a CPU is hotplugged in
@@ -5960,7 +6024,9 @@ int sched_isolate_cpu(int cpu)
smp_call_function_any(&avail_cpus, hrtimer_quiesce_cpu, &cpu, 1);
smp_call_function_any(&avail_cpus, timer_quiesce_cpu, &cpu, 1);
+ irq_lock_sparse();
stop_cpus(cpumask_of(cpu), do_isolation_work_cpu_stop, 0);
+ irq_unlock_sparse();
calc_load_migrate(rq);
update_max_interval();
@@ -6319,6 +6385,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_UP_PREPARE:
raw_spin_lock_irqsave(&rq->lock, flags);
+ walt_set_window_start(rq);
set_window_start(rq);
raw_spin_unlock_irqrestore(&rq->lock, flags);
rq->calc_load_update = calc_load_update;
@@ -6340,6 +6407,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
sched_ttwu_pending();
/* Update our root-domain */
raw_spin_lock_irqsave(&rq->lock, flags);
+ walt_migrate_sync_cpu(cpu);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
@@ -8325,6 +8393,7 @@ void __init sched_init_smp(void)
/* Move init over to a non-isolated CPU */
if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
BUG();
+ cpumask_copy(&current->cpus_requested, cpu_possible_mask);
sched_init_granularity();
free_cpumask_var(non_isolated_cpus);
@@ -8534,6 +8603,11 @@ void __init sched_init(void)
}
#endif
rq->max_idle_balance_cost = sysctl_sched_migration_cost;
+#ifdef CONFIG_SCHED_WALT
+ rq->cur_irqload = 0;
+ rq->avg_irqload = 0;
+ rq->irqload_ts = 0;
+#endif
INIT_LIST_HEAD(&rq->cfs_tasks);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 6c84b4d28914..6effb44aeb30 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -33,6 +33,7 @@ struct sugov_tunables {
struct gov_attr_set attr_set;
unsigned int up_rate_limit_us;
unsigned int down_rate_limit_us;
+ bool iowait_boost_enable;
};
struct sugov_policy {
@@ -228,6 +229,11 @@ static void sugov_get_util(unsigned long *util, unsigned long *max, u64 time)
static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
unsigned int flags)
{
+ struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+
+ if (!sg_policy->tunables->iowait_boost_enable)
+ return;
+
if (flags & SCHED_CPUFREQ_IOWAIT) {
if (sg_cpu->iowait_boost_pending)
return;
@@ -510,12 +516,36 @@ static ssize_t down_rate_limit_us_store(struct gov_attr_set *attr_set,
return count;
}
+static ssize_t iowait_boost_enable_show(struct gov_attr_set *attr_set,
+ char *buf)
+{
+ struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+
+ return sprintf(buf, "%u\n", tunables->iowait_boost_enable);
+}
+
+static ssize_t iowait_boost_enable_store(struct gov_attr_set *attr_set,
+ const char *buf, size_t count)
+{
+ struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+ bool enable;
+
+ if (kstrtobool(buf, &enable))
+ return -EINVAL;
+
+ tunables->iowait_boost_enable = enable;
+
+ return count;
+}
+
static struct governor_attr up_rate_limit_us = __ATTR_RW(up_rate_limit_us);
static struct governor_attr down_rate_limit_us = __ATTR_RW(down_rate_limit_us);
+static struct governor_attr iowait_boost_enable = __ATTR_RW(iowait_boost_enable);
static struct attribute *sugov_attributes[] = {
&up_rate_limit_us.attr,
&down_rate_limit_us.attr,
+ &iowait_boost_enable.attr,
NULL
};
@@ -675,6 +705,8 @@ static int sugov_init(struct cpufreq_policy *policy)
}
}
+ tunables->iowait_boost_enable = policy->iowait_boost_enable;
+
policy->governor_data = sg_policy;
sg_policy->tunables = tunables;
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 14225d5d8617..867cb7877511 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -133,6 +133,8 @@ retry:
if (lowest_mask) {
cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
+ cpumask_andnot(lowest_mask, lowest_mask,
+ cpu_isolated_mask);
if (drop_nopreempts)
drop_nopreempt_cpus(lowest_mask);
/*
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index e6ec68c15aa3..cf6729cb46dd 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -6,6 +6,7 @@
#include <linux/context_tracking.h>
#include <linux/cpufreq_times.h>
#include "sched.h"
+#include "walt.h"
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -79,9 +80,10 @@ void irqtime_account_irq(struct task_struct *curr)
irq_time_write_end();
- if (account)
+ if (account) {
+ walt_account_irqtime(cpu, curr, delta, wallclock);
sched_account_irqtime(cpu, curr, delta, wallclock);
- else if (curr != this_cpu_ksoftirqd())
+ } else if (curr != this_cpu_ksoftirqd())
sched_account_irqstart(cpu, curr, wallclock);
local_irq_restore(flags);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 51443a801af5..266fc95f6c0f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -55,6 +55,12 @@ unsigned int normalized_sysctl_sched_latency = 6000000ULL;
unsigned int sysctl_sched_sync_hint_enable = 1;
unsigned int sysctl_sched_cstate_aware = 1;
+#ifdef CONFIG_SCHED_WALT
+unsigned int sysctl_sched_use_walt_cpu_util = 1;
+unsigned int sysctl_sched_use_walt_task_util = 1;
+__read_mostly unsigned int sysctl_sched_walt_cpu_high_irqload =
+ (10 * NSEC_PER_MSEC);
+#endif
/*
* The initial- and re-scaling of tunables is configurable
* (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
@@ -4326,6 +4332,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
*/
#define UPDATE_TG 0x1
#define SKIP_AGE_LOAD 0x2
+#define SKIP_CPUFREQ 0x4
/* Update task and its cfs_rq load average */
static inline void update_load_avg(struct sched_entity *se, int flags)
@@ -4346,7 +4353,7 @@ static inline void update_load_avg(struct sched_entity *se, int flags)
cfs_rq->curr == se, NULL);
}
- decayed = update_cfs_rq_load_avg(now, cfs_rq, true);
+ decayed = update_cfs_rq_load_avg(now, cfs_rq, !(flags & SKIP_CPUFREQ));
decayed |= propagate_entity_load_avg(se);
if (decayed && (flags & UPDATE_TG))
@@ -4522,6 +4529,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
#define UPDATE_TG 0x0
#define SKIP_AGE_LOAD 0x0
+#define SKIP_CPUFREQ 0x0
static inline void update_load_avg(struct sched_entity *se, int not_used1){}
static inline void
@@ -4744,6 +4752,8 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
static void
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
+ int update_flags;
+
/*
* Update run-time statistics of the 'current'.
*/
@@ -4757,7 +4767,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
* - For group entity, update its weight to reflect the new share
* of its group cfs_rq.
*/
- update_load_avg(se, UPDATE_TG);
+ update_flags = UPDATE_TG;
+
+ if (flags & DEQUEUE_IDLE)
+ update_flags |= SKIP_CPUFREQ;
+
+ update_load_avg(se, update_flags);
dequeue_entity_load_avg(cfs_rq, se);
update_stats_dequeue(cfs_rq, se);
@@ -5052,7 +5067,6 @@ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
now = sched_clock_cpu(smp_processor_id());
cfs_b->runtime = cfs_b->quota;
- cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period);
}
static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
@@ -5074,7 +5088,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
{
struct task_group *tg = cfs_rq->tg;
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
- u64 amount = 0, min_amount, expires;
+ u64 amount = 0, min_amount;
/* note: this is a positive sum as runtime_remaining <= 0 */
min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining;
@@ -5091,61 +5105,17 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
cfs_b->idle = 0;
}
}
- expires = cfs_b->runtime_expires;
raw_spin_unlock(&cfs_b->lock);
cfs_rq->runtime_remaining += amount;
- /*
- * we may have advanced our local expiration to account for allowed
- * spread between our sched_clock and the one on which runtime was
- * issued.
- */
- if ((s64)(expires - cfs_rq->runtime_expires) > 0)
- cfs_rq->runtime_expires = expires;
return cfs_rq->runtime_remaining > 0;
}
-/*
- * Note: This depends on the synchronization provided by sched_clock and the
- * fact that rq->clock snapshots this value.
- */
-static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
-{
- struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
-
- /* if the deadline is ahead of our clock, nothing to do */
- if (likely((s64)(rq_clock(rq_of(cfs_rq)) - cfs_rq->runtime_expires) < 0))
- return;
-
- if (cfs_rq->runtime_remaining < 0)
- return;
-
- /*
- * If the local deadline has passed we have to consider the
- * possibility that our sched_clock is 'fast' and the global deadline
- * has not truly expired.
- *
- * Fortunately we can check determine whether this the case by checking
- * whether the global deadline has advanced. It is valid to compare
- * cfs_b->runtime_expires without any locks since we only care about
- * exact equality, so a partial write will still work.
- */
-
- if (cfs_rq->runtime_expires != cfs_b->runtime_expires) {
- /* extend local deadline, drift is bounded above by 2 ticks */
- cfs_rq->runtime_expires += TICK_NSEC;
- } else {
- /* global deadline is ahead, expiration has passed */
- cfs_rq->runtime_remaining = 0;
- }
-}
-
static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
{
/* dock delta_exec before expiring quota (as it could span periods) */
cfs_rq->runtime_remaining -= delta_exec;
- expire_cfs_rq_runtime(cfs_rq);
if (likely(cfs_rq->runtime_remaining > 0))
return;
@@ -5379,8 +5349,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
cpu_temp(cpu_of(rq)));
}
-static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
- u64 remaining, u64 expires)
+static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
{
struct cfs_rq *cfs_rq;
u64 runtime;
@@ -5401,7 +5370,6 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
remaining -= runtime;
cfs_rq->runtime_remaining += runtime;
- cfs_rq->runtime_expires = expires;
/* we check whether we're throttled above */
if (cfs_rq->runtime_remaining > 0)
@@ -5426,7 +5394,7 @@ next:
*/
static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
{
- u64 runtime, runtime_expires;
+ u64 runtime;
int throttled;
/* no need to continue the timer with no bandwidth constraint */
@@ -5454,8 +5422,6 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
/* account preceding periods in which throttling occurred */
cfs_b->nr_throttled += overrun;
- runtime_expires = cfs_b->runtime_expires;
-
/*
* This check is repeated as we are holding onto the new bandwidth while
* we unthrottle. This can potentially race with an unthrottled group
@@ -5468,8 +5434,7 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
cfs_b->distribute_running = 1;
raw_spin_unlock(&cfs_b->lock);
/* we can't nest cfs_b->lock while distributing bandwidth */
- runtime = distribute_cfs_runtime(cfs_b, runtime,
- runtime_expires);
+ runtime = distribute_cfs_runtime(cfs_b, runtime);
raw_spin_lock(&cfs_b->lock);
cfs_b->distribute_running = 0;
@@ -5546,8 +5511,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
return;
raw_spin_lock(&cfs_b->lock);
- if (cfs_b->quota != RUNTIME_INF &&
- cfs_rq->runtime_expires == cfs_b->runtime_expires) {
+ if (cfs_b->quota != RUNTIME_INF) {
cfs_b->runtime += slack_runtime;
/* we are under rq->lock, defer unthrottling using a timer */
@@ -5579,7 +5543,6 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
{
u64 runtime = 0, slice = sched_cfs_bandwidth_slice();
- u64 expires;
/* confirm we're still not at a refresh boundary */
raw_spin_lock(&cfs_b->lock);
@@ -5596,7 +5559,6 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice)
runtime = cfs_b->runtime;
- expires = cfs_b->runtime_expires;
if (runtime)
cfs_b->distribute_running = 1;
@@ -5605,11 +5567,10 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
if (!runtime)
return;
- runtime = distribute_cfs_runtime(cfs_b, runtime, expires);
+ runtime = distribute_cfs_runtime(cfs_b, runtime);
raw_spin_lock(&cfs_b->lock);
- if (expires == cfs_b->runtime_expires)
- cfs_b->runtime -= min(runtime, cfs_b->runtime);
+ cfs_b->runtime -= min(runtime, cfs_b->runtime);
cfs_b->distribute_running = 0;
raw_spin_unlock(&cfs_b->lock);
}
@@ -5936,6 +5897,25 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct sched_entity *se = &p->se;
#ifdef CONFIG_SMP
int task_new = flags & ENQUEUE_WAKEUP_NEW;
+
+ /*
+ * Update SchedTune accounting.
+ *
+ * We do it before updating the CPU capacity to ensure the
+ * boost value of the current task is accounted for in the
+ * selection of the OPP.
+ *
+ * We do it also in the case where we enqueue a throttled task;
+ * we could argue that a throttled task should not boost a CPU,
+ * however:
+ * a) properly implementing CPU boosting considering throttled
+ * tasks will increase a lot the complexity of the solution
+ * b) it's not easy to quantify the benefits introduced by
+ * such a more complex solution.
+ * Thus, for the time being we go for the simple solution and boost
+ * also for throttled RQs.
+ */
+ schedtune_enqueue_task(p, cpu_of(rq));
#endif
/*
@@ -5961,6 +5941,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (cfs_rq_throttled(cfs_rq))
break;
cfs_rq->h_nr_running++;
+ walt_inc_cfs_cumulative_runnable_avg(cfs_rq, p);
inc_cfs_rq_hmp_stats(cfs_rq, p, 1);
flags = ENQUEUE_WAKEUP;
@@ -5969,6 +5950,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
cfs_rq->h_nr_running++;
+ walt_inc_cfs_cumulative_runnable_avg(cfs_rq, p);
inc_cfs_rq_hmp_stats(cfs_rq, p, 1);
if (cfs_rq_throttled(cfs_rq))
@@ -5984,27 +5966,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
}
#ifdef CONFIG_SMP
-
- /*
- * Update SchedTune accounting.
- *
- * We do it before updating the CPU capacity to ensure the
- * boost value of the current task is accounted for in the
- * selection of the OPP.
- *
- * We do it also in the case where we enqueue a throttled task;
- * we could argue that a throttled task should not boost a CPU,
- * however:
- * a) properly implementing CPU boosting considering throttled
- * tasks will increase a lot the complexity of the solution
- * b) it's not easy to quantify the benefits introduced by
- * such a more complex solution.
- * Thus, for the time being we go for the simple solution and boost
- * also for throttled RQs.
- */
- schedtune_enqueue_task(p, cpu_of(rq));
-
if (energy_aware() && !se) {
+ walt_inc_cumulative_runnable_avg(rq, p);
if (!task_new && !rq->rd->overutilized &&
cpu_overutilized(rq->cpu)) {
rq->rd->overutilized = true;
@@ -6029,6 +5992,20 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct sched_entity *se = &p->se;
int task_sleep = flags & DEQUEUE_SLEEP;
+ if (task_sleep && rq->nr_running == 1)
+ flags |= DEQUEUE_IDLE;
+
+#ifdef CONFIG_SMP
+ /*
+ * Update SchedTune accounting
+ *
+ * We do it before updating the CPU capacity to ensure the
+ * boost value of the current task is accounted for in the
+ * selection of the OPP.
+ */
+ schedtune_dequeue_task(p, cpu_of(rq));
+#endif
+
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
dequeue_entity(cfs_rq, se, flags);
@@ -6042,6 +6019,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (cfs_rq_throttled(cfs_rq))
break;
cfs_rq->h_nr_running--;
+ walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p);
dec_cfs_rq_hmp_stats(cfs_rq, p, 1);
/* Don't dequeue parent if it has other entities besides us */
@@ -6060,14 +6038,22 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
}
for_each_sched_entity(se) {
+ int update_flags;
+
cfs_rq = cfs_rq_of(se);
cfs_rq->h_nr_running--;
+ walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p);
dec_cfs_rq_hmp_stats(cfs_rq, p, 1);
if (cfs_rq_throttled(cfs_rq))
break;
- update_load_avg(se, UPDATE_TG);
+ update_flags = UPDATE_TG;
+
+ if (flags & DEQUEUE_IDLE)
+ update_flags |= SKIP_CPUFREQ;
+
+ update_load_avg(se, update_flags);
update_cfs_shares(se);
}
@@ -6076,19 +6062,6 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
dec_rq_hmp_stats(rq, p, 1);
}
-#ifdef CONFIG_SMP
-
- /*
- * Update SchedTune accounting
- *
- * We do it before updating the CPU capacity to ensure the
- * boost value of the current task is accounted for in the
- * selection of the OPP.
- */
- schedtune_dequeue_task(p, cpu_of(rq));
-
-#endif /* CONFIG_SMP */
-
hrtick_update(rq);
}
@@ -7098,6 +7071,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
static inline unsigned long task_util(struct task_struct *p)
{
+#ifdef CONFIG_SCHED_WALT
+ if (!walt_disabled && sysctl_sched_use_walt_cpu_util) {
+ unsigned long demand = p->ravg.demand;
+ return (demand << 10) / walt_ravg_window;
+ }
+#endif
return p->se.avg.util_avg;
}
@@ -7656,6 +7635,11 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu,
if (new_util > capacity_orig)
continue;
+#ifdef CONFIG_SCHED_WALT
+ if (walt_cpu_high_irqload(i))
+ continue;
+#endif
+
/*
* Case A) Latency sensitive tasks
*
@@ -8953,7 +8937,17 @@ redo:
if (sched_feat(LB_MIN) && load < 16 && !env->sd->nr_balance_failed)
goto next;
- if ((load / 2) > env->imbalance)
+ /*
+ * p is not running task when we goes until here, so if p is one
+ * of the 2 task in src cpu rq and not the running one,
+ * that means it is the only task that can be balanced.
+ * So only when there is other tasks can be balanced or
+ * there is situation to ignore big task, it is needed
+ * to skip the task load bigger than 2*imbalance.
+ */
+ if (((cpu_rq(env->src_cpu)->nr_running > 2) ||
+ (env->flags & LBF_IGNORE_BIG_TASKS)) &&
+ ((load / 2) > env->imbalance))
goto next;
detach_task(p, env);
@@ -10446,8 +10440,10 @@ static int need_active_balance(struct lb_env *env)
* It's worth migrating the task if the src_cpu's capacity is reduced
* because of other sched_class or IRQs if more capacity stays
* available on dst_cpu.
+ * Avoid pulling the CFS task if it is the only task running.
*/
if ((env->idle != CPU_NOT_IDLE) &&
+ (env->src_rq->nr_running > 1) &&
(env->src_rq->cfs.h_nr_running == 1)) {
if ((check_cpu_capacity(env->src_rq, sd)) &&
(capacity_of(env->src_cpu)*sd->imbalance_pct < capacity_of(env->dst_cpu)*100))
@@ -10685,7 +10681,24 @@ more_balance:
/* All tasks on this runqueue were pinned by CPU affinity */
if (unlikely(env.flags & LBF_ALL_PINNED)) {
cpumask_clear_cpu(cpu_of(busiest), cpus);
- if (!cpumask_empty(cpus)) {
+ /*
+ * dst_cpu is not a valid busiest cpu in the following
+ * check since load cannot be pulled from dst_cpu to be
+ * put on dst_cpu.
+ */
+ cpumask_clear_cpu(env.dst_cpu, cpus);
+ /*
+ * Go back to "redo" iff the load-balance cpumask
+ * contains other potential busiest cpus for the
+ * current sched domain.
+ */
+ if (cpumask_intersects(cpus, sched_domain_span(env.sd))) {
+ /*
+ * Now that the check has passed, reenable
+ * dst_cpu so that load can be calculated on
+ * it in the redo path.
+ */
+ cpumask_set_cpu(env.dst_cpu, cpus);
env.loop = 0;
env.loop_break = sched_nr_migrate_break;
goto redo;
@@ -11772,7 +11785,8 @@ static inline bool vruntime_normalized(struct task_struct *p)
* - A task which has been woken up by try_to_wake_up() and
* waiting for actually being woken up by sched_ttwu_pending().
*/
- if (!se->sum_exec_runtime || p->state == TASK_WAKING)
+ if (!se->sum_exec_runtime ||
+ (p->state == TASK_WAKING && p->sched_class == &fair_sched_class))
return true;
return false;
diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c
index 598656b42203..649d6a437a13 100644
--- a/kernel/sched/hmp.c
+++ b/kernel/sched/hmp.c
@@ -764,13 +764,16 @@ unsigned int max_possible_capacity = 1024; /* max(rq->max_possible_capacity) */
unsigned int
min_max_possible_capacity = 1024; /* min(rq->max_possible_capacity) */
-/* Min window size (in ns) = 10ms */
-#define MIN_SCHED_RAVG_WINDOW 10000000
+/* Min window size (in ns) = 20ms */
+#define MIN_SCHED_RAVG_WINDOW ((20000000 / TICK_NSEC) * TICK_NSEC)
/* Max window size (in ns) = 1s */
-#define MAX_SCHED_RAVG_WINDOW 1000000000
+#define MAX_SCHED_RAVG_WINDOW ((1000000000 / TICK_NSEC) * TICK_NSEC)
-/* Window size (in ns) */
+/*
+ * Window size (in ns). Adjust for the tick size so that the window
+ * rollover occurs just before the tick boundary.
+ */
__read_mostly unsigned int sched_ravg_window = MIN_SCHED_RAVG_WINDOW;
/* Maximum allowed threshold before freq aggregation must be enabled */
@@ -1616,17 +1619,20 @@ static inline int exiting_task(struct task_struct *p)
static int __init set_sched_ravg_window(char *str)
{
+ unsigned int adj_window;
unsigned int window_size;
get_option(&str, &window_size);
- if (window_size < MIN_SCHED_RAVG_WINDOW ||
- window_size > MAX_SCHED_RAVG_WINDOW) {
- WARN_ON(1);
- return -EINVAL;
- }
+ /* Adjust for CONFIG_HZ */
+ adj_window = (window_size / TICK_NSEC) * TICK_NSEC;
+
+ /* Warn if we're a bit too far away from the expected window size */
+ WARN(adj_window < window_size - NSEC_PER_MSEC,
+ "tick-adjusted window size %u, original was %u\n", adj_window,
+ window_size);
- sched_ravg_window = window_size;
+ sched_ravg_window = adj_window;
return 0;
}
@@ -3657,8 +3663,10 @@ void fixup_busy_time(struct task_struct *p, int new_cpu)
migrate_top_tasks(p, src_rq, dest_rq);
if (!same_freq_domain(new_cpu, task_cpu(p))) {
- cpufreq_update_util(dest_rq, SCHED_CPUFREQ_INTERCLUSTER_MIG);
- cpufreq_update_util(src_rq, SCHED_CPUFREQ_INTERCLUSTER_MIG);
+ cpufreq_update_util(dest_rq, SCHED_CPUFREQ_INTERCLUSTER_MIG |
+ SCHED_CPUFREQ_WALT);
+ cpufreq_update_util(src_rq, SCHED_CPUFREQ_INTERCLUSTER_MIG |
+ SCHED_CPUFREQ_WALT);
}
if (p == src_rq->ed_task) {
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 391ec29c71c0..ac81704e14d9 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -12,6 +12,7 @@
#include <linux/hrtimer.h>
#include "tune.h"
+#include "walt.h"
int sched_rr_timeslice = RR_TIMESLICE;
int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
@@ -1445,10 +1446,15 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
{
struct sched_rt_entity *rt_se = &p->rt;
+#ifdef CONFIG_SMP
+ schedtune_enqueue_task(p, cpu_of(rq));
+#endif
+
if (flags & ENQUEUE_WAKEUP)
rt_se->timeout = 0;
enqueue_rt_entity(rt_se, flags);
+ walt_inc_cumulative_runnable_avg(rq, p);
inc_hmp_sched_stats_rt(rq, p);
if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
@@ -1486,8 +1492,13 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
{
struct sched_rt_entity *rt_se = &p->rt;
+#ifdef CONFIG_SMP
+ schedtune_dequeue_task(p, cpu_of(rq));
+#endif
+
update_curr_rt(rq);
dequeue_rt_entity(rt_se, flags);
+ walt_dec_cumulative_runnable_avg(rq, p);
dec_hmp_sched_stats_rt(rq, p);
dequeue_pushable_task(rq, p);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 90cc450dff7e..78ba150f2016 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -226,9 +226,8 @@ struct cfs_bandwidth {
ktime_t period;
u64 quota, runtime;
s64 hierarchical_quota;
- u64 runtime_expires;
- int idle, period_active;
+ short idle, period_active;
struct hrtimer period_timer, slack_timer;
struct list_head throttled_cfs_rq;
@@ -511,6 +510,10 @@ struct cfs_rq {
struct list_head leaf_cfs_rq_list;
struct task_group *tg; /* group that "owns" this runqueue */
+#ifdef CONFIG_SCHED_WALT
+ u64 cumulative_runnable_avg;
+#endif
+
#ifdef CONFIG_CFS_BANDWIDTH
#ifdef CONFIG_SCHED_HMP
@@ -518,7 +521,6 @@ struct cfs_rq {
#endif
int runtime_enabled;
- u64 runtime_expires;
s64 runtime_remaining;
u64 throttled_clock, throttled_clock_task;
@@ -819,6 +821,7 @@ struct rq {
#endif
#ifdef CONFIG_SCHED_WALT
+ unsigned int cur_freq;
u64 cumulative_runnable_avg;
u64 window_start;
u64 curr_runnable_sum;
@@ -2027,6 +2030,7 @@ static const u32 prio_to_wmult[40] = {
#define DEQUEUE_SLEEP 0x01
#define DEQUEUE_SAVE 0x02 /* matches ENQUEUE_RESTORE */
#define DEQUEUE_MOVE 0x04 /* matches ENQUEUE_MOVE */
+#define DEQUEUE_IDLE 0x80 /* The last dequeue before IDLE */
#define ENQUEUE_WAKEUP 0x01
#define ENQUEUE_RESTORE 0x02
@@ -2856,8 +2860,10 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
#ifdef CONFIG_SCHED_HMP
/*
* Skip if we've already reported, but not if this is an inter-cluster
- * migration
+ * migration. Also only allow WALT update sites.
*/
+ if (!(flags & SCHED_CPUFREQ_WALT))
+ return;
if (!sched_disable_window_stats &&
(rq->load_reported_window == rq->window_start) &&
!(flags & SCHED_CPUFREQ_INTERCLUSTER_MIG))
@@ -2865,7 +2871,8 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
rq->load_reported_window = rq->window_start;
#endif
- data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
+ data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
+ cpu_of(rq)));
if (data)
data->func(data, rq_clock(rq), flags);
}
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 3278c81cefb1..0fa11d86599e 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -1,4 +1,5 @@
#include "sched.h"
+#include "walt.h"
/*
* stop-task scheduling class.
@@ -78,6 +79,7 @@ static void
enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
{
add_nr_running(rq, 1);
+ walt_inc_cumulative_runnable_avg(rq, p);
inc_hmp_sched_stats_stop(rq, p);
}
@@ -85,6 +87,7 @@ static void
dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
{
sub_nr_running(rq, 1);
+ walt_dec_cumulative_runnable_avg(rq, p);
dec_hmp_sched_stats_stop(rq, p);
}
diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
index b84d13750604..d0ef97f484b1 100644
--- a/kernel/sched/tune.c
+++ b/kernel/sched/tune.c
@@ -240,7 +240,7 @@ schedtune_accept_deltas(int nrg_delta, int cap_delta,
* implementation especially for the computation of the per-CPU boost
* value
*/
-#define BOOSTGROUPS_COUNT 5
+#define BOOSTGROUPS_COUNT 6
/* Array of configured boostgroups */
static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = {
@@ -770,6 +770,7 @@ boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
static void schedtune_attach(struct cgroup_taskset *tset)
{
+#ifdef CONFIG_SCHED_HMP
struct task_struct *task;
struct cgroup_subsys_state *css;
struct schedtune *st;
@@ -782,6 +783,7 @@ static void schedtune_attach(struct cgroup_taskset *tset)
cgroup_taskset_for_each(task, css, tset)
sync_cgroup_colocation(task, colocate);
+#endif
}
static struct cftype files[] = {
diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
index 8d25ffbe4fed..911606537808 100644
--- a/kernel/sched/walt.c
+++ b/kernel/sched/walt.c
@@ -57,11 +57,6 @@ static unsigned int sync_cpu;
static ktime_t ktime_last;
static bool walt_ktime_suspended;
-static unsigned int task_load(struct task_struct *p)
-{
- return p->ravg.demand;
-}
-
static inline void fixup_cum_window_demand(struct rq *rq, s64 delta)
{
rq->cum_window_demand += delta;
diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h
index de7edac43674..34c72a0fcf39 100644
--- a/kernel/sched/walt.h
+++ b/kernel/sched/walt.h
@@ -54,6 +54,8 @@ static inline void walt_set_window_start(struct rq *rq) { }
static inline void walt_migrate_sync_cpu(int cpu) { }
static inline void walt_init_cpu_efficiency(void) { }
static inline u64 walt_ktime_clock(void) { return 0; }
+static inline void walt_account_irqtime(int cpu, struct task_struct *curr,
+ u64 delta, u64 wallclock) { }
#define walt_cpu_high_irqload(cpu) false
diff --git a/kernel/sys.c b/kernel/sys.c
index d5ea3360038c..25cf2aa72d3b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1854,7 +1854,11 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
return error;
}
- down_write(&mm->mmap_sem);
+ /*
+ * arg_lock protects concurent updates but we still need mmap_sem for
+ * read to exclude races with sys_brk.
+ */
+ down_read(&mm->mmap_sem);
/*
* We don't validate if these members are pointing to
@@ -1868,6 +1872,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
* to any problem in kernel itself
*/
+ spin_lock(&mm->arg_lock);
mm->start_code = prctl_map.start_code;
mm->end_code = prctl_map.end_code;
mm->start_data = prctl_map.start_data;
@@ -1879,6 +1884,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
mm->arg_end = prctl_map.arg_end;
mm->env_start = prctl_map.env_start;
mm->env_end = prctl_map.env_end;
+ spin_unlock(&mm->arg_lock);
/*
* Note this update of @saved_auxv is lockless thus
@@ -1891,7 +1897,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
if (prctl_map.auxv_size)
memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
- up_write(&mm->mmap_sem);
+ up_read(&mm->mmap_sem);
return 0;
}
#endif /* CONFIG_CHECKPOINT_RESTORE */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e2aab9cf058b..8980bdffde3d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -575,6 +575,36 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#ifdef CONFIG_SCHED_WALT
+ {
+ .procname = "sched_use_walt_cpu_util",
+ .data = &sysctl_sched_use_walt_cpu_util,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sched_use_walt_task_util",
+ .data = &sysctl_sched_use_walt_task_util,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sched_walt_init_task_load_pct",
+ .data = &sysctl_sched_walt_init_task_load_pct,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sched_walt_cpu_high_irqload",
+ .data = &sysctl_sched_walt_cpu_high_irqload,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
{
.procname = "sched_cstate_aware",
.data = &sysctl_sched_cstate_aware,
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 4171fee2d4ec..612c97156df7 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -25,6 +25,7 @@
#include <linux/posix-timers.h>
#include <linux/workqueue.h>
#include <linux/freezer.h>
+#include <linux/delay.h>
#ifdef CONFIG_MSM_PM
#include "lpm-levels.h"
@@ -503,6 +504,7 @@ int alarm_cancel(struct alarm *alarm)
if (ret >= 0)
return ret;
cpu_relax();
+ ndelay(TIMER_LOCK_TIGHT_LOOP_DELAY_NS);
}
}
EXPORT_SYMBOL_GPL(alarm_cancel);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 79fadcad21ff..6bd4247198e2 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -49,6 +49,7 @@
#include <linux/sched/deadline.h>
#include <linux/timer.h>
#include <linux/freezer.h>
+#include <linux/delay.h>
#include <asm/uaccess.h>
@@ -156,6 +157,7 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
}
cpu_relax();
+ ndelay(TIMER_LOCK_TIGHT_LOOP_DELAY_NS);
}
}
@@ -1061,6 +1063,7 @@ int hrtimer_cancel(struct hrtimer *timer)
if (ret >= 0)
return ret;
cpu_relax();
+ ndelay(TIMER_LOCK_TIGHT_LOOP_DELAY_NS);
}
}
EXPORT_SYMBOL_GPL(hrtimer_cancel);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 67646a316436..2d8b82d90c9f 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -798,6 +798,7 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
spin_unlock_irqrestore(&base->lock, *flags);
}
cpu_relax();
+ ndelay(TIMER_LOCK_TIGHT_LOOP_DELAY_NS);
}
}
@@ -1148,6 +1149,7 @@ int del_timer_sync(struct timer_list *timer)
if (ret >= 0)
return ret;
cpu_relax();
+ ndelay(TIMER_LOCK_TIGHT_LOOP_DELAY_NS);
}
}
EXPORT_SYMBOL(del_timer_sync);
diff --git a/mm/init-mm.c b/mm/init-mm.c
index 975e49f00f34..02a6962d5b0b 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -21,6 +21,7 @@ struct mm_struct init_mm = {
.mm_count = ATOMIC_INIT(1),
.mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
+ .arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
.user_ns = &init_user_ns,
INIT_MM_CONTEXT(init_mm)
diff --git a/mm/ksm.c b/mm/ksm.c
index 2a4ef426b331..98920d1d59ed 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -580,8 +580,9 @@ again:
* case this node is no longer referenced, and should be freed;
* however, it might mean that the page is under page_freeze_refs().
* The __remove_mapping() case is easy, again the node is now stale;
- * but if page is swapcache in migrate_page_move_mapping(), it might
- * still be our page, in which case it's essential to keep the node.
+ * the same is in reuse_ksm_page() case; but if page is swapcache
+ * in migrate_page_move_mapping(), it might still be our page,
+ * in which case it's essential to keep the node.
*/
while (!get_page_unless_zero(page)) {
/*
diff --git a/mm/memory.c b/mm/memory.c
index 09a57fe6ae01..0c69908d3eed 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2399,39 +2399,31 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
* Take out anonymous pages first, anonymous shared vmas are
* not dirty accountable.
*/
- if (PageAnon(old_page) && !PageKsm(old_page)) {
- if (!trylock_page(old_page)) {
- page_cache_get(old_page);
- pte_unmap_unlock(page_table, ptl);
- lock_page(old_page);
- page_table = pte_offset_map_lock(mm, pmd, address,
- &ptl);
- if (!pte_same(*page_table, orig_pte)) {
- unlock_page(old_page);
- pte_unmap_unlock(page_table, ptl);
- page_cache_release(old_page);
- return 0;
- }
- page_cache_release(old_page);
- }
- if (reuse_swap_page(old_page)) {
- /*
- * The page is all ours. Move it to our anon_vma so
- * the rmap code will not search our parent or siblings.
- * Protected against the rmap code by the page lock.
- */
- page_move_anon_rmap(old_page, vma, address);
+ if (PageAnon(old_page)) {
+ /* PageKsm() doesn't necessarily raise the page refcount */
+ if (PageKsm(old_page) || page_count(old_page) != 1)
+ goto copy;
+ if (!trylock_page(old_page))
+ goto copy;
+ if (PageKsm(old_page) || page_mapcount(old_page) != 1 || page_count(old_page) != 1) {
unlock_page(old_page);
- return wp_page_reuse(mm, vma, address, page_table, ptl,
- orig_pte, old_page, 0, 0);
+ goto copy;
}
+ /*
+ * Ok, we've got the only map reference, and the only
+ * page count reference, and the page is locked,
+ * it's dark out, and we're wearing sunglasses. Hit it.
+ */
unlock_page(old_page);
+ wp_page_reuse(mm, vma, address, page_table, ptl,
+ orig_pte, old_page, 0, 0);
+ return VM_FAULT_WRITE;
} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
(VM_WRITE|VM_SHARED))) {
return wp_page_shared(mm, vma, address, page_table, pmd,
ptl, orig_pte, old_page);
}
-
+copy:
/*
* Ok, we need to copy. Oh, well..
*/
diff --git a/sound/soc/msm/qdsp6v2/msm-pcm-q6-v2.c b/sound/soc/msm/qdsp6v2/msm-pcm-q6-v2.c
index 487aaf2390c0..5f4225e675ad 100644
--- a/sound/soc/msm/qdsp6v2/msm-pcm-q6-v2.c
+++ b/sound/soc/msm/qdsp6v2/msm-pcm-q6-v2.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2012-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -995,6 +996,14 @@ static int msm_pcm_capture_copy(struct snd_pcm_substream *substream,
xfer = size;
offset = prtd->in_frame_info[idx].offset;
pr_debug("Offset value = %d\n", offset);
+
+ if (offset >= size) {
+ pr_err("%s: Invalid dsp buf offset\n", __func__);
+ ret = -EFAULT;
+ q6asm_cpu_buf_release(OUT, prtd->audio_client);
+ goto fail;
+ }
+
if (copy_to_user(buf, bufptr+offset, xfer)) {
pr_err("Failed to copy buf to user\n");
ret = -EFAULT;