summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinux Build Service Account <lnxbuild@localhost>2016-11-19 05:39:11 -0700
committerLinux Build Service Account <lnxbuild@localhost>2016-11-19 05:39:11 -0700
commit3c45c2a8a2a07a76e2d129d02561d061211e70c8 (patch)
treed577a760af70325db6038640d89264ad05b14ca2 /kernel
parent9adece3859ce8e9723b43dfc722bcc1bafdcfb2e (diff)
parent599e3b86154aa2d3d367e6326b0ffc6e4f76d020 (diff)
Promotion of kernel.lnx.4.4-161119.
CRs Change ID Subject -------------------------------------------------------------------------------------------------------------- 1088658 I2f994ae0250ffc8f740ea633324815ae429c74be msm: ipa3: linearize large skbs 1077102 I09359b528b4742f72a76690930f3d0ed90bb2caa msm: mdss: move warnings and errors out of mdss spinlock 1089895 I84185558fa6e80b13d7d0078bda9d75143680941 tcp: take care of truncations done by sk_filter() 1091511 Ia151b2dd5229f07790ac961af298305b24e098fb msm: wlan: update regulatory database 1081957 I24820bd6254002f8a8db9604d230dcbce59b1beb clk: qcom: Add support to be able to slew PLL 1081738 I10a788726358c56df9bfe11f2332e3823d7cd332 ARM: dts: msm: Enable auto GM for WLED in pmicobalt 1077726 I031ca48f0e0c39f1b2cb51081ecd55b086fb4c9b msm: mdss: fix pp timeout during transition from LP1 to 1074985 Ib2268181a617c23d62b5b6f857be5327113b2a67 soc: qcom: smem: Redesign smem memory architecture 1090708 I9cda84d1c199b72ce8b9e2997601bcc7430ddbf3 ARM: dts: msm: Update the console uart gpios for msmfalc 1080245 I3b4cf83e776750d993d53331142223109bf0862e clk: qcom: Add support for debugfs support 1087110 I3694952289c76394af8d40cd89fd2175f49ac127 msm: mdss: Add systrace for readptr_done 1089865 Ia73ab1ba51df7b501d246bb45141018409496d01 ARM: dts: msm: ensure contiguous MSI for PCIe on msmcoba 941978 Idee8691d769218d7e732c9b7f936a2c40946b239 Revert "scsi: ufs: stub UFS shutdown handler" 1091072 I7e9ada5de1f619c6a34a4b2e1764f5e908564ce5 iio: rradc: Update reading USBIN_V channel 1075082 I971e555ec8d02ccf4382e83132a696b065a8ff12 qseecom: improve error checks in qseecom_probe() 1080245 Ib67b3a3409c9e7d8adb710bb524f54f543abf712 clk: add/modify debugfs support for clocks 941978 Id499abc27303bfed72fab4d61abb872bad7d9043 scsi: ufs: error out all issued requests after shutdown 1083537 I73fc02b812f2e6694e2a6aa8bdad2381a5f19406 ASoC: msm: Fix sound card registration failure 1085331 I92e98ab46107fbcfd843898423b41716a204c2ae ARM: dts: msm: Correct interrupt assignments for msmcoba 1073250 Idc9ca896b3fe6c1c6a72a066a6e453d27a3173e8 Asoc: clean up bootup errors 1091147 I30b8488a1c19815601e6a1c5bcbdeed53715f8fa usb: phy: qusb: Make sure QUSB PHY is into proper state 1086292 I6482dc3d21fdc3e570fd53022e2fb9427668d939 msm: mdss: add null check before dereferencing src_fmt 1086292 I4812330453dedacd16dad1d920a2bacc3f67042b msm: mdss: fix race condition in dsi clk off request 1088709 I21e1c029e6b245cfa26a187b35bb1f6845302484 clk: msm: Add the CLKFLAG_NO_RATE_CACHE flag for MM cloc 1082112 I171c91e700c24ecc213ccda705bbe6188d22a43a scsi: ufs: fix sleep in atomic context 1091354 I9f928f0aad6af346de43965755beb039e422047a Revert "defconfig: msm: avoid compilation of MDSS DP dri 1090727 I78d2c27743d30b90a96e3d8df60859f67db7ddb8 ARM: dts: msm: Add ufs regulators for msmfalcon interpos 1090029 I66f6de42b106fa2027285e7393b6f9fc143d00d8 leds: qpnp-flash: Fix the mask in the flash prepare API 1089181 I4a382915a6c3a6b9d445ec1f5d57fb499a011f1a driver: thermal: msm_thermal: Enable Reliability algorit 1079438 Ib14c5b9121190dded5071ff60ecf0be8e5e5c232 ARM: dts: msm: Add physical dimensions for NT35597 panel 1060212 Iabe79bae5f9471c3c6128ed21efd04de00739daa leds: qpnp-flash-v2: Add support for thermal derate feat 1091127 I7220ad565212c325514301e4c59415b807deb99a ARM: dts: msm: Add gladiator support on msmfalcon and ms 1091440 I0eb8b9a357f172984612175d1b03dd872df91b6f diag: Call diagmem_exit only if the mempool is initializ 1090076 Ia85688854f26fe871d5c1253c2d51d75d84deb8f ARM: dts: msm: Add dummy regulator for LCDB bias 1064071 Ic0dedbad372fd9029b932dd99633a650049751ed msm: kgsl: Fix pagetable member of struct kgsl_memdesc 1083537 I3d2765535793d6ef9153cfcab4b44a9adad67e15 ASoC: msm: Add support for USB/WCN/TDM Audio 1091141 I6ce48512df5973bf8a2a3081a3a6f8759aeb499f ARM: dts: msm: Set USB core clock rate for USB2/USB3 for 1060212 Ie7a94f59e58b8f1b0816afda2496449694629205 leds: qpnp-flash-v2: add support to read pmic revid 1080701 If08ff46e72d537254e90707f28c849a86f262853 ARM: dts: msm: specify I2C configuration for msmfalcon 1079442 I822d6280b301b2db6194c845098c935e612ca61c ASoC: wcd934x: Fix adie loopback through sidetone src pa 1089895 Idc52737bc96097a9220dfe47bb76e94ff1026a05 rose: limit sk_filter trim to payload 1091147 Ibfecfe1846d02b959bd249acac3fe4c57b88aaf0 USB: phy: qusb: Turn on vdd along with 1p8/3p3 LDOs when 1090701 I0e06be169edc2eb1d35ef7fc6c41ff1809aebd03 pinctrl: qcom: msmfalcon: Update gpios as per latest gpi 1086292 I422d53d008223a9b0520f499e629f681bb6afa05 mdss: mdp: avoid panic if recovery handler is uninitiali 1060212 I42503ccd2b2dcc62c5c868132d202b9698c9d216 leds: qpnp-flash-v2: change from dev_*() to pr_*() for l 1090076 Ie828c8568ef09c89cff157d16d3cb322647b6f6e ARM: dts: msm: enable mdss power supplies for falcon tra 1074879 I8d224a70cbef162f27078b62b73acaa22670861d sched/hmp: Enhance co-location and scheduler boost featu 1087471 I15323e3ef91401142d3841db59c18fd8fee753fd sched: Remove thread group iteration from colocation 1085170 Ie23d473302d7fbda9b243a150e5c52d025007e4f usb: pd: Stop processing SVDM if handler found 1091540 I61523188f45daca026b90943c845b43a8327f51e qcom-charger: smb2: Disable try.SINK mode in the probe 1081738 Iee99e9d1b999c84ece075d2f17e9cdf6aef9a2ac leds: qpnp-wled: Add support to configure AUTO_GM settin 1081922 I9aa7a000e75b50c6b26970deaba2131c87087b8c msm: mdss: fix autorefresh disable during handoff 1075694 I9cf2f94892bdeb83fab0068902419b1603520364 msm: kgsl: preserve ISENSE registers across GPU power co 1085321 1085649 I3c9422f3a790c0c1633ab64d4213a088faaeb9e5 diag: Set the diag write buffers to busy state on channe 1090311 I96cdcb9e3642906b4afa08d9bde07e123d9b3977 USB: Allow skipping device resume during system resume 1074879 I470bcd0588e038b4a540d337fe6a412f2fa74920 sched: revise boost logic when boost_type is SCHED_BOOST 1087020 I6f9b7a630158355a7f920dcf9cfffe537b1c6a85 ASoC: msm: q6dspv2: fix potentional information leak 1089062 Icb04f6175b66fa46405e77d10fddf06b0051ee5f phy: qcom-ufs: update ufs phy 1-lane settings 1082590 I4cdcbd31b5fa5ceac0eea7c743ea9286f231b80b scsi: ufs: handle LINERESET during hibern8 1081738 I964b3452d0cdb3618b4ab446655ae75fa3a1049d leds: qpnp-wled: Add support to configure auto PFM for p 1080245 I936496e553bc958c10e743fd8a225ffc7fbc0f79 clk: Add support to allow client to print all enabled cl 1079373 Ifd7b2b88e7ab4c952b743fede6e24795069d653a qcom-charger: WA for legacy bit set on hard reboot 1090518 I7f1c0d9d84607821893a1e5d17934dae5acef5f4 clk: qcom: Add support for RCGs with dynamic and fixed s 1089865 I1e74f1b03c3e15880efdac7ff07aca2f628de99d ARM: dts: msm: enable QGIC MSI for PCIe on msmcobalt 1088059 I66cbe48b7f4910228a6af57610a8427fea7fd1f2 msm: mdss: fix incorrect mutex unlocking during NOTIFY_U 1087418 Ia3fb69dca00654dacd8d1faae34715e40e097480 scsi: ufs: enable auto hibern8 only after device initial 1088216 I326eceeddff8e77d346c3365fa46cd539324451f ARM: dts: msm: Add support for USB device for msmfalcon 1060212 Iafb7915e196a18b5f8076dda8fb06a4bd71a8e6e leds: qpnp-flash-v2: Add support for configuring OTST th 1086372 Ia03380dfa4852c80fedb38f3c79f55d8d1a9a7f6 icnss: Reset mpm_wcssaon_config bits before top level re 1080245 I0a202af6f46c7cf164036d65487db5c40aab4063 clk: Add support for list_rates ops for clocks 1091477 I7435f05f20e12a7704ae5d9597b5cdc9b5a61d00 qcom-charger: Change usb_icl votable for PD vote 1089062 Ief5df61d91fbd765c595533b3380a602a2540e5e scsi: ufs-qcom: update clock scaling sequence 1085217 I62de66e9b0bb1eeeac3c94d1ac1037285811b631 msm: ipa3: header file change for wdi-stats 1080674 I15ef73049cee76c6ea5b3916d9281bbd9fdfc563 ARM: dts: msm: specify UART configuration on msmfalcon. 1090525 I48c50bc320425c0db40cd4865e05c6b7a7fb5da3 msm: sde: remove secure camera ctrl_id definition 1061507 Iad71abbed72aa40b5c839260f5c297a885f7d128 ASoC: wcd-mbhc: correct cross connection check 1085064 Ib53902459646e590df4dc7fcb00f833d5e8f41ed usb: pd: Don't suspend charging unless changing voltages 1064071 Ic0dedbad661143977a226d50263c26b5af579ce3 msm: kgsl: Make sure USE_CPU_MAP + MAP_USER_MEM work tog 1090862 987021 I0d1797a4df9ff67f3b162a1b5d26320ca989f54a msm: mdss: hide additional kernel addresses from unprivi Change-Id: Ic6272ada932975c2562cb87d4a617520002db3d3 CRs-Fixed: 1082112, 1075694, 1091440, 1085331, 1089062, 1081922, 1089895, 1077726, 1090029, 1061507, 1091354, 1074879, 987021, 1086292, 1085217, 1087020, 1080245, 1088709, 1089181, 1085064, 1087471, 1088059, 1080674, 1090862, 1079442, 1087418, 1090727, 1085649, 1064071, 1081738, 1086372, 941978, 1090518, 1090708, 1077102, 1090076, 1085321, 1091477, 1090701, 1090311, 1091511, 1091141, 1074985, 1079438, 1091147, 1075082, 1091127, 1087110, 1082590, 1081957, 1090525, 1085170, 1088658, 1080701, 1083537, 1091540, 1088216, 1079373, 1060212, 1073250, 1089865, 1091072
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/Makefile2
-rw-r--r--kernel/sched/boost.c226
-rw-r--r--kernel/sched/core.c3
-rw-r--r--kernel/sched/fair.c153
-rw-r--r--kernel/sched/hmp.c512
-rw-r--r--kernel/sched/rt.c12
-rw-r--r--kernel/sched/sched.h47
-rw-r--r--kernel/sched/tune.c184
-rw-r--r--kernel/sysctl.c19
9 files changed, 783 insertions, 375 deletions
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 7d0d34c53e08..7c0382a3eace 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -15,7 +15,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
obj-y += wait.o completion.o idle.o sched_avg.o
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
-obj-$(CONFIG_SCHED_HMP) += hmp.o
+obj-$(CONFIG_SCHED_HMP) += hmp.o boost.o
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
obj-$(CONFIG_SCHEDSTATS) += stats.o
obj-$(CONFIG_SCHED_DEBUG) += debug.o
diff --git a/kernel/sched/boost.c b/kernel/sched/boost.c
new file mode 100644
index 000000000000..fcfda385b74a
--- /dev/null
+++ b/kernel/sched/boost.c
@@ -0,0 +1,226 @@
+/* Copyright (c) 2012-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "sched.h"
+#include <linux/of.h>
+#include <linux/sched/core_ctl.h>
+#include <trace/events/sched.h>
+
+/*
+ * Scheduler boost is a mechanism to temporarily place tasks on CPUs
+ * with higher capacity than those where a task would have normally
+ * ended up with their load characteristics. Any entity enabling
+ * boost is responsible for disabling it as well.
+ */
+
+unsigned int sysctl_sched_boost;
+static enum sched_boost_policy boost_policy;
+static enum sched_boost_policy boost_policy_dt = SCHED_BOOST_NONE;
+static DEFINE_MUTEX(boost_mutex);
+static unsigned int freq_aggr_threshold_backup;
+
+static inline void boost_kick(int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+
+ if (!test_and_set_bit(BOOST_KICK, &rq->hmp_flags))
+ smp_send_reschedule(cpu);
+}
+
+static void boost_kick_cpus(void)
+{
+ int i;
+ struct cpumask kick_mask;
+
+ if (boost_policy != SCHED_BOOST_ON_BIG)
+ return;
+
+ cpumask_andnot(&kick_mask, cpu_online_mask, cpu_isolated_mask);
+
+ for_each_cpu(i, &kick_mask) {
+ if (cpu_capacity(i) != max_capacity)
+ boost_kick(i);
+ }
+}
+
+int got_boost_kick(void)
+{
+ int cpu = smp_processor_id();
+ struct rq *rq = cpu_rq(cpu);
+
+ return test_bit(BOOST_KICK, &rq->hmp_flags);
+}
+
+void clear_boost_kick(int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+
+ clear_bit(BOOST_KICK, &rq->hmp_flags);
+}
+
+/*
+ * Scheduler boost type and boost policy might at first seem unrelated,
+ * however, there exists a connection between them that will allow us
+ * to use them interchangeably during placement decisions. We'll explain
+ * the connection here in one possible way so that the implications are
+ * clear when looking at placement policies.
+ *
+ * When policy = SCHED_BOOST_NONE, type is either none or RESTRAINED
+ * When policy = SCHED_BOOST_ON_ALL or SCHED_BOOST_ON_BIG, type can
+ * neither be none nor RESTRAINED.
+ */
+static void set_boost_policy(int type)
+{
+ if (type == SCHED_BOOST_NONE || type == RESTRAINED_BOOST) {
+ boost_policy = SCHED_BOOST_NONE;
+ return;
+ }
+
+ if (boost_policy_dt) {
+ boost_policy = boost_policy_dt;
+ return;
+ }
+
+ if (min_possible_efficiency != max_possible_efficiency) {
+ boost_policy = SCHED_BOOST_ON_BIG;
+ return;
+ }
+
+ boost_policy = SCHED_BOOST_ON_ALL;
+}
+
+enum sched_boost_policy sched_boost_policy(void)
+{
+ return boost_policy;
+}
+
+static bool verify_boost_params(int old_val, int new_val)
+{
+ /*
+ * Boost can only be turned on or off. There is no possiblity of
+ * switching from one boost type to another or to set the same
+ * kind of boost several times.
+ */
+ return !(!!old_val == !!new_val);
+}
+
+static void _sched_set_boost(int old_val, int type)
+{
+ switch (type) {
+ case NO_BOOST:
+ if (old_val == FULL_THROTTLE_BOOST)
+ core_ctl_set_boost(false);
+ else if (old_val == CONSERVATIVE_BOOST)
+ restore_cgroup_boost_settings();
+ else
+ update_freq_aggregate_threshold(
+ freq_aggr_threshold_backup);
+ break;
+
+ case FULL_THROTTLE_BOOST:
+ core_ctl_set_boost(true);
+ boost_kick_cpus();
+ break;
+
+ case CONSERVATIVE_BOOST:
+ update_cgroup_boost_settings();
+ boost_kick_cpus();
+ break;
+
+ case RESTRAINED_BOOST:
+ freq_aggr_threshold_backup =
+ update_freq_aggregate_threshold(1);
+ break;
+
+ default:
+ WARN_ON(1);
+ return;
+ }
+
+ set_boost_policy(type);
+ sysctl_sched_boost = type;
+ trace_sched_set_boost(type);
+}
+
+void sched_boost_parse_dt(void)
+{
+ struct device_node *sn;
+ const char *boost_policy;
+
+ if (!sched_enable_hmp)
+ return;
+
+ sn = of_find_node_by_path("/sched-hmp");
+ if (!sn)
+ return;
+
+ if (!of_property_read_string(sn, "boost-policy", &boost_policy)) {
+ if (!strcmp(boost_policy, "boost-on-big"))
+ boost_policy_dt = SCHED_BOOST_ON_BIG;
+ else if (!strcmp(boost_policy, "boost-on-all"))
+ boost_policy_dt = SCHED_BOOST_ON_ALL;
+ }
+}
+
+int sched_set_boost(int type)
+{
+ int ret = 0;
+
+ if (!sched_enable_hmp)
+ return -EINVAL;
+
+ mutex_lock(&boost_mutex);
+
+ if (verify_boost_params(sysctl_sched_boost, type))
+ _sched_set_boost(sysctl_sched_boost, type);
+ else
+ ret = -EINVAL;
+
+ mutex_unlock(&boost_mutex);
+ return ret;
+}
+
+int sched_boost_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret;
+ unsigned int *data = (unsigned int *)table->data;
+ unsigned int old_val;
+
+ if (!sched_enable_hmp)
+ return -EINVAL;
+
+ mutex_lock(&boost_mutex);
+
+ old_val = *data;
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+ if (ret || !write)
+ goto done;
+
+ if (verify_boost_params(old_val, *data)) {
+ _sched_set_boost(old_val, *data);
+ } else {
+ *data = old_val;
+ ret = -EINVAL;
+ }
+
+done:
+ mutex_unlock(&boost_mutex);
+ return ret;
+}
+
+int sched_boost(void)
+{
+ return sysctl_sched_boost;
+}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 84563da000cf..a5d101e8a5f2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7846,7 +7846,6 @@ void __init sched_init_smp(void)
hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
update_cluster_topology();
- init_sched_hmp_boost_policy();
init_hrtick();
@@ -7895,7 +7894,7 @@ void __init sched_init(void)
BUG_ON(num_possible_cpus() > BITS_PER_LONG);
- sched_hmp_parse_dt();
+ sched_boost_parse_dt();
init_clusters();
#ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1674b1054f83..3db77aff2433 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2596,6 +2596,7 @@ static u32 __compute_runnable_contrib(u64 n)
#define SBC_FLAG_COLOC_CLUSTER 0x10000
#define SBC_FLAG_WAKER_CLUSTER 0x20000
#define SBC_FLAG_BACKUP_CLUSTER 0x40000
+#define SBC_FLAG_BOOST_CLUSTER 0x80000
struct cpu_select_env {
struct task_struct *p;
@@ -2605,7 +2606,7 @@ struct cpu_select_env {
u8 need_waker_cluster:1;
u8 sync:1;
u8 ignore_prev_cpu:1;
- enum sched_boost_type boost_type;
+ enum sched_boost_policy boost_policy;
int prev_cpu;
DECLARE_BITMAP(candidate_list, NR_CPUS);
DECLARE_BITMAP(backup_list, NR_CPUS);
@@ -2705,10 +2706,38 @@ select_least_power_cluster(struct cpu_select_env *env)
struct sched_cluster *cluster;
if (env->rtg) {
- env->task_load = scale_load_to_cpu(task_load(env->p),
- cluster_first_cpu(env->rtg->preferred_cluster));
- env->sbc_best_cluster_flag |= SBC_FLAG_COLOC_CLUSTER;
- return env->rtg->preferred_cluster;
+ int cpu = cluster_first_cpu(env->rtg->preferred_cluster);
+
+ env->task_load = scale_load_to_cpu(task_load(env->p), cpu);
+
+ if (task_load_will_fit(env->p, env->task_load,
+ cpu, env->boost_policy)) {
+ env->sbc_best_cluster_flag |= SBC_FLAG_COLOC_CLUSTER;
+
+ if (env->boost_policy == SCHED_BOOST_NONE)
+ return env->rtg->preferred_cluster;
+
+ for_each_sched_cluster(cluster) {
+ if (cluster != env->rtg->preferred_cluster) {
+ __set_bit(cluster->id,
+ env->backup_list);
+ __clear_bit(cluster->id,
+ env->candidate_list);
+ }
+ }
+
+ return env->rtg->preferred_cluster;
+ }
+
+ /*
+ * Since the task load does not fit on the preferred
+ * cluster anymore, pretend that the task does not
+ * have any preferred cluster. This allows the waking
+ * task to get the appropriate CPU it needs as per the
+ * non co-location placement policy without having to
+ * wait until the preferred cluster is updated.
+ */
+ env->rtg = NULL;
}
for_each_sched_cluster(cluster) {
@@ -2718,7 +2747,7 @@ select_least_power_cluster(struct cpu_select_env *env)
env->task_load = scale_load_to_cpu(task_load(env->p),
cpu);
if (task_load_will_fit(env->p, env->task_load, cpu,
- env->boost_type))
+ env->boost_policy))
return cluster;
__set_bit(cluster->id, env->backup_list);
@@ -2961,7 +2990,14 @@ static void find_best_cpu_in_cluster(struct sched_cluster *c,
update_spare_capacity(stats, env, i, c->capacity,
env->cpu_load);
- if (env->boost_type == SCHED_BOOST_ON_ALL ||
+ /*
+ * need_idle takes precedence over sched boost but when both
+ * are set, idlest CPU with in all the clusters is selected
+ * when boost_policy = BOOST_ON_ALL whereas idlest CPU in the
+ * big cluster is selected within boost_policy = BOOST_ON_BIG.
+ */
+ if ((!env->need_idle &&
+ env->boost_policy != SCHED_BOOST_NONE) ||
env->need_waker_cluster ||
sched_cpu_high_irqload(i) ||
spill_threshold_crossed(env, cpu_rq(i)))
@@ -3005,7 +3041,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
struct task_struct *task = env->p;
struct sched_cluster *cluster;
- if (env->boost_type != SCHED_BOOST_NONE || env->reason ||
+ if (env->boost_policy != SCHED_BOOST_NONE || env->reason ||
!task->ravg.mark_start ||
env->need_idle || !sched_short_sleep_task_threshold)
return false;
@@ -3034,7 +3070,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
cluster = cpu_rq(prev_cpu)->cluster;
if (!task_load_will_fit(task, env->task_load, prev_cpu,
- sched_boost_type())) {
+ sched_boost_policy())) {
__set_bit(cluster->id, env->backup_list);
__clear_bit(cluster->id, env->candidate_list);
@@ -3056,7 +3092,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
static inline bool
wake_to_waker_cluster(struct cpu_select_env *env)
{
- return env->boost_type == SCHED_BOOST_NONE &&
+ return env->boost_policy == SCHED_BOOST_NONE &&
!env->need_idle && !env->reason && env->sync &&
task_load(current) > sched_big_waker_task_load &&
task_load(env->p) < sched_small_wakee_task_load;
@@ -3098,7 +3134,6 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
.reason = reason,
.need_idle = wake_to_idle(p),
.need_waker_cluster = 0,
- .boost_type = sched_boost_type(),
.sync = sync,
.prev_cpu = target,
.ignore_prev_cpu = 0,
@@ -3107,6 +3142,9 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
.sbc_best_cluster_flag = 0,
};
+ env.boost_policy = task_sched_boost(p) ?
+ sched_boost_policy() : SCHED_BOOST_NONE;
+
bitmap_copy(env.candidate_list, all_cluster_ids, NR_CPUS);
bitmap_zero(env.backup_list, NR_CPUS);
@@ -3178,12 +3216,23 @@ retry:
sbc_flag |= env.sbc_best_flag;
target = stats.best_cpu;
} else {
- if (env.rtg) {
+ if (env.rtg && env.boost_policy == SCHED_BOOST_NONE) {
env.rtg = NULL;
goto retry;
}
- find_backup_cluster(&env, &stats);
+ /*
+ * With boost_policy == SCHED_BOOST_ON_BIG, we reach here with
+ * backup_list = little cluster, candidate_list = none and
+ * stats->best_capacity_cpu points the best spare capacity
+ * CPU among the CPUs in the big cluster.
+ */
+ if (env.boost_policy == SCHED_BOOST_ON_BIG &&
+ stats.best_capacity_cpu >= 0)
+ sbc_flag |= SBC_FLAG_BOOST_CLUSTER;
+ else
+ find_backup_cluster(&env, &stats);
+
if (stats.best_capacity_cpu >= 0) {
target = stats.best_capacity_cpu;
sbc_flag |= SBC_FLAG_BEST_CAP_CPU;
@@ -3193,8 +3242,8 @@ retry:
out:
sbc_flag |= env.sbc_best_cluster_flag;
rcu_read_unlock();
- trace_sched_task_load(p, sched_boost(), env.reason, env.sync,
- env.need_idle, sbc_flag, target);
+ trace_sched_task_load(p, sched_boost_policy() && task_sched_boost(p),
+ env.reason, env.sync, env.need_idle, sbc_flag, target);
return target;
}
@@ -3402,11 +3451,9 @@ static inline int migration_needed(struct task_struct *p, int cpu)
if (task_will_be_throttled(p))
return 0;
- if (sched_boost_type() == SCHED_BOOST_ON_BIG) {
- if (cpu_capacity(cpu) != max_capacity)
- return UP_MIGRATION;
- return 0;
- }
+ if (sched_boost_policy() == SCHED_BOOST_ON_BIG &&
+ cpu_capacity(cpu) != max_capacity && task_sched_boost(p))
+ return UP_MIGRATION;
if (sched_cpu_high_irqload(cpu))
return IRQLOAD_MIGRATION;
@@ -3420,7 +3467,7 @@ static inline int migration_needed(struct task_struct *p, int cpu)
return DOWN_MIGRATION;
}
- if (!grp && !task_will_fit(p, cpu)) {
+ if (!task_will_fit(p, cpu)) {
rcu_read_unlock();
return UP_MIGRATION;
}
@@ -6648,10 +6695,7 @@ enum fbq_type { regular, remote, all };
#define LBF_NEED_BREAK 0x02
#define LBF_DST_PINNED 0x04
#define LBF_SOME_PINNED 0x08
-#define LBF_SCHED_BOOST_ACTIVE_BALANCE 0x40
#define LBF_BIG_TASK_ACTIVE_BALANCE 0x80
-#define LBF_HMP_ACTIVE_BALANCE (LBF_SCHED_BOOST_ACTIVE_BALANCE | \
- LBF_BIG_TASK_ACTIVE_BALANCE)
#define LBF_IGNORE_BIG_TASKS 0x100
#define LBF_IGNORE_PREFERRED_CLUSTER_TASKS 0x200
#define LBF_MOVED_RELATED_THREAD_GROUP_TASK 0x400
@@ -6682,6 +6726,7 @@ struct lb_env {
enum fbq_type fbq_type;
struct list_head tasks;
+ enum sched_boost_policy boost_policy;
};
/*
@@ -6826,9 +6871,14 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
/* Record that we found atleast one task that could run on dst_cpu */
env->flags &= ~LBF_ALL_PINNED;
- if (cpu_capacity(env->dst_cpu) > cpu_capacity(env->src_cpu) &&
- nr_big_tasks(env->src_rq) && !is_big_task(p))
- return 0;
+ if (cpu_capacity(env->dst_cpu) > cpu_capacity(env->src_cpu)) {
+ if (nr_big_tasks(env->src_rq) && !is_big_task(p))
+ return 0;
+
+ if (env->boost_policy == SCHED_BOOST_ON_BIG &&
+ !task_sched_boost(p))
+ return 0;
+ }
twf = task_will_fit(p, env->dst_cpu);
@@ -6951,8 +7001,7 @@ static int detach_tasks(struct lb_env *env)
if (env->imbalance <= 0)
return 0;
- if (cpu_capacity(env->dst_cpu) < cpu_capacity(env->src_cpu) &&
- !sched_boost())
+ if (cpu_capacity(env->dst_cpu) < cpu_capacity(env->src_cpu))
env->flags |= LBF_IGNORE_BIG_TASKS;
else if (!same_cluster(env->dst_cpu, env->src_cpu))
env->flags |= LBF_IGNORE_PREFERRED_CLUSTER_TASKS;
@@ -7255,8 +7304,10 @@ bail_inter_cluster_balance(struct lb_env *env, struct sd_lb_stats *sds)
int local_capacity, busiest_capacity;
int local_pwr_cost, busiest_pwr_cost;
int nr_cpus;
+ int boost = sched_boost();
- if (!sysctl_sched_restrict_cluster_spill || sched_boost())
+ if (!sysctl_sched_restrict_cluster_spill ||
+ boost == FULL_THROTTLE_BOOST || boost == CONSERVATIVE_BOOST)
return 0;
local_cpu = group_first_cpu(sds->local);
@@ -7628,11 +7679,6 @@ static bool update_sd_pick_busiest_active_balance(struct lb_env *env,
{
if (env->idle != CPU_NOT_IDLE &&
cpu_capacity(env->dst_cpu) > group_rq_capacity(sg)) {
- if (sched_boost() && !sds->busiest && sgs->sum_nr_running) {
- env->flags |= LBF_SCHED_BOOST_ACTIVE_BALANCE;
- return true;
- }
-
if (sgs->sum_nr_big_tasks >
sds->busiest_stat.sum_nr_big_tasks) {
env->flags |= LBF_BIG_TASK_ACTIVE_BALANCE;
@@ -8045,7 +8091,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
if (!sds.busiest || busiest->sum_nr_running == 0)
goto out_balanced;
- if (env->flags & LBF_HMP_ACTIVE_BALANCE)
+ if (env->flags & LBF_BIG_TASK_ACTIVE_BALANCE)
goto force_balance;
if (bail_inter_cluster_balance(env, &sds))
@@ -8257,7 +8303,7 @@ static int need_active_balance(struct lb_env *env)
{
struct sched_domain *sd = env->sd;
- if (env->flags & LBF_HMP_ACTIVE_BALANCE)
+ if (env->flags & LBF_BIG_TASK_ACTIVE_BALANCE)
return 1;
if (env->idle == CPU_NEWLY_IDLE) {
@@ -8348,20 +8394,21 @@ static int load_balance(int this_cpu, struct rq *this_rq,
struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask);
struct lb_env env = {
- .sd = sd,
- .dst_cpu = this_cpu,
- .dst_rq = this_rq,
- .dst_grpmask = sched_group_cpus(sd->groups),
- .idle = idle,
- .loop_break = sched_nr_migrate_break,
- .cpus = cpus,
- .fbq_type = all,
- .tasks = LIST_HEAD_INIT(env.tasks),
- .imbalance = 0,
- .flags = 0,
- .loop = 0,
+ .sd = sd,
+ .dst_cpu = this_cpu,
+ .dst_rq = this_rq,
+ .dst_grpmask = sched_group_cpus(sd->groups),
+ .idle = idle,
+ .loop_break = sched_nr_migrate_break,
+ .cpus = cpus,
+ .fbq_type = all,
+ .tasks = LIST_HEAD_INIT(env.tasks),
+ .imbalance = 0,
+ .flags = 0,
+ .loop = 0,
.busiest_nr_running = 0,
.busiest_grp_capacity = 0,
+ .boost_policy = sched_boost_policy(),
};
/*
@@ -8510,7 +8557,7 @@ more_balance:
no_move:
if (!ld_moved) {
- if (!(env.flags & LBF_HMP_ACTIVE_BALANCE))
+ if (!(env.flags & LBF_BIG_TASK_ACTIVE_BALANCE))
schedstat_inc(sd, lb_failed[idle]);
/*
@@ -8520,7 +8567,7 @@ no_move:
* excessive cache_hot migrations and active balances.
*/
if (idle != CPU_NEWLY_IDLE &&
- !(env.flags & LBF_HMP_ACTIVE_BALANCE))
+ !(env.flags & LBF_BIG_TASK_ACTIVE_BALANCE))
sd->nr_balance_failed++;
if (need_active_balance(&env)) {
@@ -8797,6 +8844,7 @@ static int active_load_balance_cpu_stop(void *data)
.busiest_grp_capacity = 0,
.flags = 0,
.loop = 0,
+ .boost_policy = sched_boost_policy(),
};
bool moved = false;
@@ -9272,7 +9320,8 @@ static inline int _nohz_kick_needed_hmp(struct rq *rq, int cpu, int *type)
if (rq->nr_running < 2)
return 0;
- if (!sysctl_sched_restrict_cluster_spill || sched_boost())
+ if (!sysctl_sched_restrict_cluster_spill ||
+ sched_boost_policy() == SCHED_BOOST_ON_ALL)
return 1;
if (cpu_max_power_cost(cpu) == max_power_cost)
diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c
index 30391aae0822..968a41e0e81e 100644
--- a/kernel/sched/hmp.c
+++ b/kernel/sched/hmp.c
@@ -17,8 +17,6 @@
#include <linux/cpufreq.h>
#include <linux/list_sort.h>
#include <linux/syscore_ops.h>
-#include <linux/of.h>
-#include <linux/sched/core_ctl.h>
#include "sched.h"
@@ -231,52 +229,6 @@ fail:
return ret;
}
-/*
- * It is possible that CPUs of the same micro architecture can have slight
- * difference in the efficiency due to other factors like cache size. The
- * BOOST_ON_BIG policy may not be optimial for such systems. The required
- * boost policy can be specified via device tree to handle this.
- */
-static int __read_mostly sched_boost_policy = SCHED_BOOST_NONE;
-
-/*
- * This should be called after clusters are populated and
- * the respective efficiency values are initialized.
- */
-void init_sched_hmp_boost_policy(void)
-{
- /*
- * Initialize the boost type here if it is not passed from
- * device tree.
- */
- if (sched_boost_policy == SCHED_BOOST_NONE) {
- if (max_possible_efficiency != min_possible_efficiency)
- sched_boost_policy = SCHED_BOOST_ON_BIG;
- else
- sched_boost_policy = SCHED_BOOST_ON_ALL;
- }
-}
-
-void sched_hmp_parse_dt(void)
-{
- struct device_node *sn;
- const char *boost_policy;
-
- if (!sched_enable_hmp)
- return;
-
- sn = of_find_node_by_path("/sched-hmp");
- if (!sn)
- return;
-
- if (!of_property_read_string(sn, "boost-policy", &boost_policy)) {
- if (!strcmp(boost_policy, "boost-on-big"))
- sched_boost_policy = SCHED_BOOST_ON_BIG;
- else if (!strcmp(boost_policy, "boost-on-all"))
- sched_boost_policy = SCHED_BOOST_ON_ALL;
- }
-}
-
unsigned int max_possible_efficiency = 1;
unsigned int min_possible_efficiency = UINT_MAX;
@@ -680,29 +632,6 @@ int __init set_sched_enable_hmp(char *str)
early_param("sched_enable_hmp", set_sched_enable_hmp);
-int got_boost_kick(void)
-{
- int cpu = smp_processor_id();
- struct rq *rq = cpu_rq(cpu);
-
- return test_bit(BOOST_KICK, &rq->hmp_flags);
-}
-
-inline void clear_boost_kick(int cpu)
-{
- struct rq *rq = cpu_rq(cpu);
-
- clear_bit(BOOST_KICK, &rq->hmp_flags);
-}
-
-inline void boost_kick(int cpu)
-{
- struct rq *rq = cpu_rq(cpu);
-
- if (!test_and_set_bit(BOOST_KICK, &rq->hmp_flags))
- smp_send_reschedule(cpu);
-}
-
/* Clear any HMP scheduler related requests pending from or on cpu */
void clear_hmp_request(int cpu)
{
@@ -840,6 +769,9 @@ min_max_possible_capacity = 1024; /* min(rq->max_possible_capacity) */
/* Window size (in ns) */
__read_mostly unsigned int sched_ravg_window = MIN_SCHED_RAVG_WINDOW;
+/* Maximum allowed threshold before freq aggregation must be enabled */
+#define MAX_FREQ_AGGR_THRESH 1000
+
/* Temporarily disable window-stats activity on all cpus */
unsigned int __read_mostly sched_disable_window_stats;
@@ -919,8 +851,8 @@ static const unsigned int top_tasks_bitmap_size =
* C1 busy time = 5 + 5 + 6 = 16ms
*
*/
-static __read_mostly unsigned int sched_freq_aggregate;
-__read_mostly unsigned int sysctl_sched_freq_aggregate;
+static __read_mostly unsigned int sched_freq_aggregate = 1;
+__read_mostly unsigned int sysctl_sched_freq_aggregate = 1;
unsigned int __read_mostly sysctl_sched_freq_aggregate_threshold_pct;
static unsigned int __read_mostly sched_freq_aggregate_threshold;
@@ -937,14 +869,6 @@ unsigned int max_task_load(void)
/* Use this knob to turn on or off HMP-aware task placement logic */
unsigned int __read_mostly sched_enable_hmp;
-/*
- * Scheduler boost is a mechanism to temporarily place tasks on CPUs
- * with higher capacity than those where a task would have normally
- * ended up with their load characteristics. Any entity enabling
- * boost is responsible for disabling it as well.
- */
-unsigned int sysctl_sched_boost;
-
/* A cpu can no longer accommodate more tasks if:
*
* rq->nr_running > sysctl_sched_spill_nr_run ||
@@ -996,6 +920,21 @@ unsigned int __read_mostly sched_downmigrate;
unsigned int __read_mostly sysctl_sched_downmigrate_pct = 60;
/*
+ * Task groups whose aggregate demand on a cpu is more than
+ * sched_group_upmigrate need to be up-migrated if possible.
+ */
+unsigned int __read_mostly sched_group_upmigrate;
+unsigned int __read_mostly sysctl_sched_group_upmigrate_pct = 100;
+
+/*
+ * Task groups, once up-migrated, will need to drop their aggregate
+ * demand to less than sched_group_downmigrate before they are "down"
+ * migrated.
+ */
+unsigned int __read_mostly sched_group_downmigrate;
+unsigned int __read_mostly sysctl_sched_group_downmigrate_pct = 95;
+
+/*
* The load scale factor of a CPU gets boosted when its max frequency
* is restricted due to which the tasks are migrating to higher capacity
* CPUs early. The sched_upmigrate threshold is auto-upgraded by
@@ -1017,33 +956,46 @@ sched_long_cpu_selection_threshold = 100 * NSEC_PER_MSEC;
unsigned int __read_mostly sysctl_sched_restrict_cluster_spill;
-void update_up_down_migrate(void)
+static void
+_update_up_down_migrate(unsigned int *up_migrate, unsigned int *down_migrate)
{
- unsigned int up_migrate = pct_to_real(sysctl_sched_upmigrate_pct);
- unsigned int down_migrate = pct_to_real(sysctl_sched_downmigrate_pct);
unsigned int delta;
if (up_down_migrate_scale_factor == 1024)
- goto done;
+ return;
- delta = up_migrate - down_migrate;
+ delta = *up_migrate - *down_migrate;
- up_migrate /= NSEC_PER_USEC;
- up_migrate *= up_down_migrate_scale_factor;
- up_migrate >>= 10;
- up_migrate *= NSEC_PER_USEC;
+ *up_migrate /= NSEC_PER_USEC;
+ *up_migrate *= up_down_migrate_scale_factor;
+ *up_migrate >>= 10;
+ *up_migrate *= NSEC_PER_USEC;
- up_migrate = min(up_migrate, sched_ravg_window);
+ *up_migrate = min(*up_migrate, sched_ravg_window);
- down_migrate /= NSEC_PER_USEC;
- down_migrate *= up_down_migrate_scale_factor;
- down_migrate >>= 10;
- down_migrate *= NSEC_PER_USEC;
+ *down_migrate /= NSEC_PER_USEC;
+ *down_migrate *= up_down_migrate_scale_factor;
+ *down_migrate >>= 10;
+ *down_migrate *= NSEC_PER_USEC;
- down_migrate = min(down_migrate, up_migrate - delta);
-done:
+ *down_migrate = min(*down_migrate, *up_migrate - delta);
+}
+
+static void update_up_down_migrate(void)
+{
+ unsigned int up_migrate = pct_to_real(sysctl_sched_upmigrate_pct);
+ unsigned int down_migrate = pct_to_real(sysctl_sched_downmigrate_pct);
+
+ _update_up_down_migrate(&up_migrate, &down_migrate);
sched_upmigrate = up_migrate;
sched_downmigrate = down_migrate;
+
+ up_migrate = pct_to_real(sysctl_sched_group_upmigrate_pct);
+ down_migrate = pct_to_real(sysctl_sched_group_downmigrate_pct);
+
+ _update_up_down_migrate(&up_migrate, &down_migrate);
+ sched_group_upmigrate = up_migrate;
+ sched_group_downmigrate = down_migrate;
}
void set_hmp_defaults(void)
@@ -1134,82 +1086,6 @@ u64 cpu_load_sync(int cpu, int sync)
return scale_load_to_cpu(cpu_cravg_sync(cpu, sync), cpu);
}
-static int boost_refcount;
-static DEFINE_SPINLOCK(boost_lock);
-static DEFINE_MUTEX(boost_mutex);
-
-static void boost_kick_cpus(void)
-{
- int i;
-
- for_each_online_cpu(i) {
- if (cpu_capacity(i) != max_capacity)
- boost_kick(i);
- }
-}
-
-int sched_boost(void)
-{
- return boost_refcount > 0;
-}
-
-int sched_set_boost(int enable)
-{
- unsigned long flags;
- int ret = 0;
- int old_refcount;
-
- if (!sched_enable_hmp)
- return -EINVAL;
-
- spin_lock_irqsave(&boost_lock, flags);
-
- old_refcount = boost_refcount;
-
- if (enable == 1) {
- boost_refcount++;
- } else if (!enable) {
- if (boost_refcount >= 1)
- boost_refcount--;
- else
- ret = -EINVAL;
- } else {
- ret = -EINVAL;
- }
-
- if (!old_refcount && boost_refcount)
- boost_kick_cpus();
-
- if (boost_refcount <= 1)
- core_ctl_set_boost(boost_refcount == 1);
- trace_sched_set_boost(boost_refcount);
- spin_unlock_irqrestore(&boost_lock, flags);
-
- return ret;
-}
-
-int sched_boost_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
-{
- int ret;
-
- mutex_lock(&boost_mutex);
- if (!write)
- sysctl_sched_boost = sched_boost();
-
- ret = proc_dointvec(table, write, buffer, lenp, ppos);
- if (ret || !write)
- goto done;
-
- ret = (sysctl_sched_boost <= 1) ?
- sched_set_boost(sysctl_sched_boost) : -EINVAL;
-
-done:
- mutex_unlock(&boost_mutex);
- return ret;
-}
-
/*
* Task will fit on a cpu if it's bandwidth consumption on that cpu
* will be less than sched_upmigrate. A big task that was previously
@@ -1219,60 +1095,63 @@ done:
* tasks with load close to the upmigrate threshold
*/
int task_load_will_fit(struct task_struct *p, u64 task_load, int cpu,
- enum sched_boost_type boost_type)
+ enum sched_boost_policy boost_policy)
{
- int upmigrate;
+ int upmigrate = sched_upmigrate;
if (cpu_capacity(cpu) == max_capacity)
return 1;
- if (boost_type != SCHED_BOOST_ON_BIG) {
+ if (cpu_capacity(task_cpu(p)) > cpu_capacity(cpu))
+ upmigrate = sched_downmigrate;
+
+ if (boost_policy != SCHED_BOOST_ON_BIG) {
if (task_nice(p) > SCHED_UPMIGRATE_MIN_NICE ||
upmigrate_discouraged(p))
return 1;
- upmigrate = sched_upmigrate;
- if (cpu_capacity(task_cpu(p)) > cpu_capacity(cpu))
- upmigrate = sched_downmigrate;
-
if (task_load < upmigrate)
return 1;
+ } else {
+ if (task_sched_boost(p) || task_load >= upmigrate)
+ return 0;
+
+ return 1;
}
return 0;
}
-enum sched_boost_type sched_boost_type(void)
-{
- if (sched_boost())
- return sched_boost_policy;
-
- return SCHED_BOOST_NONE;
-}
-
int task_will_fit(struct task_struct *p, int cpu)
{
u64 tload = scale_load_to_cpu(task_load(p), cpu);
- return task_load_will_fit(p, tload, cpu, sched_boost_type());
+ return task_load_will_fit(p, tload, cpu, sched_boost_policy());
}
-int group_will_fit(struct sched_cluster *cluster,
- struct related_thread_group *grp, u64 demand)
+static int
+group_will_fit(struct sched_cluster *cluster, struct related_thread_group *grp,
+ u64 demand, bool group_boost)
{
int cpu = cluster_first_cpu(cluster);
int prev_capacity = 0;
- unsigned int threshold = sched_upmigrate;
+ unsigned int threshold = sched_group_upmigrate;
u64 load;
if (cluster->capacity == max_capacity)
return 1;
+ if (group_boost)
+ return 0;
+
+ if (!demand)
+ return 1;
+
if (grp->preferred_cluster)
prev_capacity = grp->preferred_cluster->capacity;
if (cluster->capacity < prev_capacity)
- threshold = sched_downmigrate;
+ threshold = sched_group_downmigrate;
load = scale_load_to_cpu(demand, cpu);
if (load < threshold)
@@ -1495,6 +1374,23 @@ void post_big_task_count_change(const struct cpumask *cpus)
DEFINE_MUTEX(policy_mutex);
+unsigned int update_freq_aggregate_threshold(unsigned int threshold)
+{
+ unsigned int old_threshold;
+
+ mutex_lock(&policy_mutex);
+
+ old_threshold = sysctl_sched_freq_aggregate_threshold_pct;
+
+ sysctl_sched_freq_aggregate_threshold_pct = threshold;
+ sched_freq_aggregate_threshold =
+ pct_to_real(sysctl_sched_freq_aggregate_threshold_pct);
+
+ mutex_unlock(&policy_mutex);
+
+ return old_threshold;
+}
+
static inline int invalid_value_freq_input(unsigned int *data)
{
if (data == &sysctl_sched_freq_aggregate)
@@ -1578,7 +1474,9 @@ int sched_hmp_proc_update_handler(struct ctl_table *table, int write,
if (write && (old_val == *data))
goto done;
- if (sysctl_sched_downmigrate_pct > sysctl_sched_upmigrate_pct) {
+ if (sysctl_sched_downmigrate_pct > sysctl_sched_upmigrate_pct ||
+ sysctl_sched_group_downmigrate_pct >
+ sysctl_sched_group_upmigrate_pct) {
*data = old_val;
ret = -EINVAL;
goto done;
@@ -3110,37 +3008,9 @@ static void reset_all_task_stats(void)
{
struct task_struct *g, *p;
- read_lock(&tasklist_lock);
do_each_thread(g, p) {
- raw_spin_lock_irq(&p->pi_lock);
reset_task_stats(p);
- raw_spin_unlock_irq(&p->pi_lock);
} while_each_thread(g, p);
- read_unlock(&tasklist_lock);
-}
-
-static void disable_window_stats(void)
-{
- unsigned long flags;
- int i;
-
- local_irq_save(flags);
- for_each_possible_cpu(i)
- raw_spin_lock(&cpu_rq(i)->lock);
-
- sched_disable_window_stats = 1;
-
- for_each_possible_cpu(i)
- raw_spin_unlock(&cpu_rq(i)->lock);
-
- local_irq_restore(flags);
-}
-
-/* Called with all cpu's rq->lock held */
-static void enable_window_stats(void)
-{
- sched_disable_window_stats = 0;
-
}
enum reset_reason_code {
@@ -3166,17 +3036,22 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
unsigned int old = 0, new = 0;
struct related_thread_group *grp;
- read_lock(&related_thread_group_lock);
-
- disable_window_stats();
+ local_irq_save(flags);
- reset_all_task_stats();
+ read_lock(&tasklist_lock);
- local_irq_save(flags);
+ read_lock(&related_thread_group_lock);
+ /* Taking all runqueue locks prevents race with sched_exit(). */
for_each_possible_cpu(cpu)
raw_spin_lock(&cpu_rq(cpu)->lock);
+ sched_disable_window_stats = 1;
+
+ reset_all_task_stats();
+
+ read_unlock(&tasklist_lock);
+
list_for_each_entry(grp, &related_thread_groups, list) {
int j;
@@ -3196,7 +3071,7 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
sched_load_granule = sched_ravg_window / NUM_LOAD_INDICES;
}
- enable_window_stats();
+ sched_disable_window_stats = 0;
for_each_possible_cpu(cpu) {
struct rq *rq = cpu_rq(cpu);
@@ -3239,10 +3114,10 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
for_each_possible_cpu(cpu)
raw_spin_unlock(&cpu_rq(cpu)->lock);
- local_irq_restore(flags);
-
read_unlock(&related_thread_group_lock);
+ local_irq_restore(flags);
+
trace_sched_reset_all_window_stats(window_start, window_size,
sched_ktime_clock() - start_ts, reason, old, new);
}
@@ -3824,13 +3699,13 @@ static void check_for_up_down_migrate_update(const struct cpumask *cpus)
}
/* Return cluster which can offer required capacity for group */
-static struct sched_cluster *
-best_cluster(struct related_thread_group *grp, u64 total_demand)
+static struct sched_cluster *best_cluster(struct related_thread_group *grp,
+ u64 total_demand, bool group_boost)
{
struct sched_cluster *cluster = NULL;
for_each_sched_cluster(cluster) {
- if (group_will_fit(cluster, grp, total_demand))
+ if (group_will_fit(cluster, grp, total_demand, group_boost))
return cluster;
}
@@ -3841,6 +3716,9 @@ static void _set_preferred_cluster(struct related_thread_group *grp)
{
struct task_struct *p;
u64 combined_demand = 0;
+ bool boost_on_big = sched_boost_policy() == SCHED_BOOST_ON_BIG;
+ bool group_boost = false;
+ u64 wallclock;
if (!sysctl_sched_enable_colocation) {
grp->last_update = sched_ktime_clock();
@@ -3848,31 +3726,43 @@ static void _set_preferred_cluster(struct related_thread_group *grp)
return;
}
+ if (list_empty(&grp->tasks))
+ return;
+
+ wallclock = sched_ktime_clock();
+
/*
* wakeup of two or more related tasks could race with each other and
* could result in multiple calls to _set_preferred_cluster being issued
* at same time. Avoid overhead in such cases of rechecking preferred
* cluster
*/
- if (sched_ktime_clock() - grp->last_update < sched_ravg_window / 10)
+ if (wallclock - grp->last_update < sched_ravg_window / 10)
return;
- list_for_each_entry(p, &grp->tasks, grp_list)
+ list_for_each_entry(p, &grp->tasks, grp_list) {
+ if (boost_on_big && task_sched_boost(p)) {
+ group_boost = true;
+ break;
+ }
+
+ if (p->ravg.mark_start < wallclock -
+ (sched_ravg_window * sched_ravg_hist_size))
+ continue;
+
combined_demand += p->ravg.demand;
- grp->preferred_cluster = best_cluster(grp, combined_demand);
+ }
+
+ grp->preferred_cluster = best_cluster(grp,
+ combined_demand, group_boost);
grp->last_update = sched_ktime_clock();
trace_sched_set_preferred_cluster(grp, combined_demand);
}
void set_preferred_cluster(struct related_thread_group *grp)
{
- /*
- * Prevent possible deadlock with update_children(). Not updating
- * the preferred cluster once is not a big deal.
- */
- if (!raw_spin_trylock(&grp->lock))
- return;
+ raw_spin_lock(&grp->lock);
_set_preferred_cluster(grp);
raw_spin_unlock(&grp->lock);
}
@@ -3880,6 +3770,8 @@ void set_preferred_cluster(struct related_thread_group *grp)
#define ADD_TASK 0
#define REM_TASK 1
+#define DEFAULT_CGROUP_COLOC_ID 1
+
static inline void free_group_cputime(struct related_thread_group *grp)
{
free_percpu(grp->cpu_time);
@@ -4116,64 +4008,19 @@ static void free_related_thread_group(struct rcu_head *rcu)
kfree(grp);
}
-/*
- * The thread group for a task can change while we are here. However,
- * add_new_task_to_grp() will take care of any tasks that we miss here.
- * When a parent exits, and a child thread is simultaneously exiting,
- * sched_set_group_id() will synchronize those operations.
- */
-static void update_children(struct task_struct *leader,
- struct related_thread_group *grp, int event)
-{
- struct task_struct *child;
- struct rq *rq;
- unsigned long flags;
-
- if (!thread_group_leader(leader))
- return;
-
- if (event == ADD_TASK && !sysctl_sched_enable_thread_grouping)
- return;
-
- if (thread_group_empty(leader))
- return;
-
- child = next_thread(leader);
-
- do {
- rq = task_rq_lock(child, &flags);
-
- if (event == REM_TASK && child->grp && grp == child->grp) {
- transfer_busy_time(rq, grp, child, event);
- list_del_init(&child->grp_list);
- rcu_assign_pointer(child->grp, NULL);
- } else if (event == ADD_TASK && !child->grp) {
- transfer_busy_time(rq, grp, child, event);
- list_add(&child->grp_list, &grp->tasks);
- rcu_assign_pointer(child->grp, grp);
- }
-
- task_rq_unlock(rq, child, &flags);
- } while_each_thread(leader, child);
-
-}
-
static void remove_task_from_group(struct task_struct *p)
{
struct related_thread_group *grp = p->grp;
struct rq *rq;
int empty_group = 1;
- unsigned long flags;
raw_spin_lock(&grp->lock);
- rq = task_rq_lock(p, &flags);
+ rq = __task_rq_lock(p);
transfer_busy_time(rq, p->grp, p, REM_TASK);
list_del_init(&p->grp_list);
rcu_assign_pointer(p->grp, NULL);
- task_rq_unlock(rq, p, &flags);
-
- update_children(p, grp, REM_TASK);
+ __task_rq_unlock(rq);
if (!list_empty(&grp->tasks)) {
empty_group = 0;
@@ -4182,7 +4029,8 @@ static void remove_task_from_group(struct task_struct *p)
raw_spin_unlock(&grp->lock);
- if (empty_group) {
+ /* Reserved groups cannot be destroyed */
+ if (empty_group && grp->id != DEFAULT_CGROUP_COLOC_ID) {
list_del(&grp->list);
call_rcu(&grp->rcu, free_related_thread_group);
}
@@ -4192,7 +4040,6 @@ static int
add_task_to_group(struct task_struct *p, struct related_thread_group *grp)
{
struct rq *rq;
- unsigned long flags;
raw_spin_lock(&grp->lock);
@@ -4200,13 +4047,11 @@ add_task_to_group(struct task_struct *p, struct related_thread_group *grp)
* Change p->grp under rq->lock. Will prevent races with read-side
* reference of p->grp in various hot-paths
*/
- rq = task_rq_lock(p, &flags);
+ rq = __task_rq_lock(p);
transfer_busy_time(rq, grp, p, ADD_TASK);
list_add(&p->grp_list, &grp->tasks);
rcu_assign_pointer(p->grp, grp);
- task_rq_unlock(rq, p, &flags);
-
- update_children(p, grp, ADD_TASK);
+ __task_rq_unlock(rq);
_set_preferred_cluster(grp);
@@ -4219,23 +4064,33 @@ void add_new_task_to_grp(struct task_struct *new)
{
unsigned long flags;
struct related_thread_group *grp;
- struct task_struct *parent;
+ struct task_struct *leader = new->group_leader;
+ unsigned int leader_grp_id = sched_get_group_id(leader);
- if (!sysctl_sched_enable_thread_grouping)
+ if (!sysctl_sched_enable_thread_grouping &&
+ leader_grp_id != DEFAULT_CGROUP_COLOC_ID)
return;
if (thread_group_leader(new))
return;
- parent = new->group_leader;
+ if (leader_grp_id == DEFAULT_CGROUP_COLOC_ID) {
+ if (!same_schedtune(new, leader))
+ return;
+ }
write_lock_irqsave(&related_thread_group_lock, flags);
rcu_read_lock();
- grp = task_related_thread_group(parent);
+ grp = task_related_thread_group(leader);
rcu_read_unlock();
- /* Its possible that update_children() already added us to the group */
+ /*
+ * It's possible that someone already added the new task to the
+ * group. A leader's thread group is updated prior to calling
+ * this function. It's also possible that the leader has exited
+ * the group. In either case, there is nothing else to do.
+ */
if (!grp || new->grp) {
write_unlock_irqrestore(&related_thread_group_lock, flags);
return;
@@ -4250,14 +4105,55 @@ void add_new_task_to_grp(struct task_struct *new)
write_unlock_irqrestore(&related_thread_group_lock, flags);
}
+#if defined(CONFIG_SCHED_TUNE) && defined(CONFIG_CGROUP_SCHEDTUNE)
+/*
+ * We create a default colocation group at boot. There is no need to
+ * synchronize tasks between cgroups at creation time because the
+ * correct cgroup hierarchy is not available at boot. Therefore cgroup
+ * colocation is turned off by default even though the colocation group
+ * itself has been allocated. Furthermore this colocation group cannot
+ * be destroyted once it has been created. All of this has been as part
+ * of runtime optimizations.
+ *
+ * The job of synchronizing tasks to the colocation group is done when
+ * the colocation flag in the cgroup is turned on.
+ */
+static int __init create_default_coloc_group(void)
+{
+ struct related_thread_group *grp = NULL;
+ unsigned long flags;
+
+ grp = alloc_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
+ if (IS_ERR(grp)) {
+ WARN_ON(1);
+ return -ENOMEM;
+ }
+
+ write_lock_irqsave(&related_thread_group_lock, flags);
+ list_add(&grp->list, &related_thread_groups);
+ write_unlock_irqrestore(&related_thread_group_lock, flags);
+
+ update_freq_aggregate_threshold(MAX_FREQ_AGGR_THRESH);
+ return 0;
+}
+late_initcall(create_default_coloc_group);
+
+int sync_cgroup_colocation(struct task_struct *p, bool insert)
+{
+ unsigned int grp_id = insert ? DEFAULT_CGROUP_COLOC_ID : 0;
+
+ return sched_set_group_id(p, grp_id);
+}
+#endif
+
int sched_set_group_id(struct task_struct *p, unsigned int group_id)
{
int rc = 0;
unsigned long flags;
struct related_thread_group *grp = NULL;
- /* Prevents tasks from exiting while we are managing groups. */
- write_lock_irqsave(&related_thread_group_lock, flags);
+ raw_spin_lock_irqsave(&p->pi_lock, flags);
+ write_lock(&related_thread_group_lock);
/* Switching from one group to another directly is not permitted */
if ((current != p && p->flags & PF_EXITING) ||
@@ -4272,6 +4168,12 @@ int sched_set_group_id(struct task_struct *p, unsigned int group_id)
grp = lookup_related_thread_group(group_id);
if (!grp) {
+ /* This is a reserved id */
+ if (group_id == DEFAULT_CGROUP_COLOC_ID) {
+ rc = -EINVAL;
+ goto done;
+ }
+
grp = alloc_related_thread_group(group_id);
if (IS_ERR(grp)) {
rc = -ENOMEM;
@@ -4281,10 +4183,10 @@ int sched_set_group_id(struct task_struct *p, unsigned int group_id)
list_add(&grp->list, &related_thread_groups);
}
- BUG_ON(!grp);
rc = add_task_to_group(p, grp);
done:
- write_unlock_irqrestore(&related_thread_group_lock, flags);
+ write_unlock(&related_thread_group_lock);
+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
return rc;
}
@@ -4529,7 +4431,7 @@ bool early_detection_notify(struct rq *rq, u64 wallclock)
struct task_struct *p;
int loop_max = 10;
- if (!sched_boost() || !rq->cfs.h_nr_running)
+ if (sched_boost_policy() == SCHED_BOOST_NONE || !rq->cfs.h_nr_running)
return 0;
rq->ed_task = NULL;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index ba4403e910d8..12a04f30ef77 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1677,8 +1677,13 @@ static int find_lowest_rq_hmp(struct task_struct *task)
int prev_cpu = task_cpu(task);
u64 cpu_load, min_load = ULLONG_MAX;
int i;
- int restrict_cluster = sched_boost() ? 0 :
- sysctl_sched_restrict_cluster_spill;
+ int restrict_cluster;
+ int boost_on_big;
+
+ boost_on_big = sched_boost() == FULL_THROTTLE_BOOST &&
+ sched_boost_policy() == SCHED_BOOST_ON_BIG;
+
+ restrict_cluster = sysctl_sched_restrict_cluster_spill;
/* Make sure the mask is initialized first */
if (unlikely(!lowest_mask))
@@ -1697,6 +1702,9 @@ static int find_lowest_rq_hmp(struct task_struct *task)
*/
for_each_sched_cluster(cluster) {
+ if (boost_on_big && cluster->capacity != max_possible_capacity)
+ continue;
+
cpumask_and(&candidate_mask, &cluster->cpus, lowest_mask);
cpumask_andnot(&candidate_mask, &candidate_mask,
cpu_isolated_mask);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4289bf6cd642..30838bb9b442 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1061,8 +1061,6 @@ extern unsigned int max_load_scale_factor;
extern unsigned int max_possible_capacity;
extern unsigned int min_max_possible_capacity;
extern unsigned int max_power_cost;
-extern unsigned int sched_upmigrate;
-extern unsigned int sched_downmigrate;
extern unsigned int sched_init_task_load_windows;
extern unsigned int up_down_migrate_scale_factor;
extern unsigned int sysctl_sched_restrict_cluster_spill;
@@ -1106,18 +1104,23 @@ extern void sched_account_irqstart(int cpu, struct task_struct *curr,
u64 wallclock);
extern unsigned int cpu_temp(int cpu);
extern unsigned int nr_eligible_big_tasks(int cpu);
-extern void update_up_down_migrate(void);
extern int update_preferred_cluster(struct related_thread_group *grp,
struct task_struct *p, u32 old_load);
extern void set_preferred_cluster(struct related_thread_group *grp);
extern void add_new_task_to_grp(struct task_struct *new);
+extern unsigned int update_freq_aggregate_threshold(unsigned int threshold);
-enum sched_boost_type {
+enum sched_boost_policy {
SCHED_BOOST_NONE,
SCHED_BOOST_ON_BIG,
SCHED_BOOST_ON_ALL,
};
+#define NO_BOOST 0
+#define FULL_THROTTLE_BOOST 1
+#define CONSERVATIVE_BOOST 2
+#define RESTRAINED_BOOST 3
+
static inline struct sched_cluster *cpu_cluster(int cpu)
{
return cpu_rq(cpu)->cluster;
@@ -1387,14 +1390,11 @@ extern void set_hmp_defaults(void);
extern int power_delta_exceeded(unsigned int cpu_cost, unsigned int base_cost);
extern unsigned int power_cost(int cpu, u64 demand);
extern void reset_all_window_stats(u64 window_start, unsigned int window_size);
-extern void boost_kick(int cpu);
extern int sched_boost(void);
extern int task_load_will_fit(struct task_struct *p, u64 task_load, int cpu,
- enum sched_boost_type boost_type);
-extern enum sched_boost_type sched_boost_type(void);
+ enum sched_boost_policy boost_policy);
+extern enum sched_boost_policy sched_boost_policy(void);
extern int task_will_fit(struct task_struct *p, int cpu);
-extern int group_will_fit(struct sched_cluster *cluster,
- struct related_thread_group *grp, u64 demand);
extern u64 cpu_load(int cpu);
extern u64 cpu_load_sync(int cpu, int sync);
extern int preferred_cluster(struct sched_cluster *cluster,
@@ -1422,10 +1422,32 @@ extern u64 cpu_upmigrate_discourage_read_u64(struct cgroup_subsys_state *css,
struct cftype *cft);
extern int cpu_upmigrate_discourage_write_u64(struct cgroup_subsys_state *css,
struct cftype *cft, u64 upmigrate_discourage);
-extern void sched_hmp_parse_dt(void);
-extern void init_sched_hmp_boost_policy(void);
+extern void sched_boost_parse_dt(void);
extern void clear_top_tasks_bitmap(unsigned long *bitmap);
+#if defined(CONFIG_SCHED_TUNE) && defined(CONFIG_CGROUP_SCHEDTUNE)
+extern bool task_sched_boost(struct task_struct *p);
+extern int sync_cgroup_colocation(struct task_struct *p, bool insert);
+extern bool same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2);
+extern void update_cgroup_boost_settings(void);
+extern void restore_cgroup_boost_settings(void);
+
+#else
+static inline bool
+same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2)
+{
+ return true;
+}
+
+static inline bool task_sched_boost(struct task_struct *p)
+{
+ return true;
+}
+
+static inline void update_cgroup_boost_settings(void) { }
+static inline void restore_cgroup_boost_settings(void) { }
+#endif
+
#else /* CONFIG_SCHED_HMP */
struct hmp_sched_stats;
@@ -1615,8 +1637,7 @@ static inline void post_big_task_count_change(void) { }
static inline void set_hmp_defaults(void) { }
static inline void clear_reserved(int cpu) { }
-static inline void sched_hmp_parse_dt(void) {}
-static inline void init_sched_hmp_boost_policy(void) {}
+static inline void sched_boost_parse_dt(void) {}
#define trace_sched_cpu_load(...)
#define trace_sched_cpu_load_lb(...)
diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
index 4f8182302e5e..ee2af8e0b5ce 100644
--- a/kernel/sched/tune.c
+++ b/kernel/sched/tune.c
@@ -25,6 +25,33 @@ struct schedtune {
/* Boost value for tasks on that SchedTune CGroup */
int boost;
+#ifdef CONFIG_SCHED_HMP
+ /* Toggle ability to override sched boost enabled */
+ bool sched_boost_no_override;
+
+ /*
+ * Controls whether a cgroup is eligible for sched boost or not. This
+ * can temporariliy be disabled by the kernel based on the no_override
+ * flag above.
+ */
+ bool sched_boost_enabled;
+
+ /*
+ * This tracks the default value of sched_boost_enabled and is used
+ * restore the value following any temporary changes to that flag.
+ */
+ bool sched_boost_enabled_backup;
+
+ /*
+ * Controls whether tasks of this cgroup should be colocated with each
+ * other and tasks of other cgroups that have the same flag turned on.
+ */
+ bool colocate;
+
+ /* Controls whether further updates are allowed to the colocate flag */
+ bool colocate_update_disabled;
+#endif
+
};
static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
@@ -54,6 +81,13 @@ static inline struct schedtune *parent_st(struct schedtune *st)
static struct schedtune
root_schedtune = {
.boost = 0,
+#ifdef CONFIG_SCHED_HMP
+ .sched_boost_no_override = false,
+ .sched_boost_enabled = true,
+ .sched_boost_enabled_backup = true,
+ .colocate = false,
+ .colocate_update_disabled = false,
+#endif
};
/*
@@ -97,6 +131,121 @@ struct boost_groups {
/* Boost groups affecting each CPU in the system */
DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups);
+#ifdef CONFIG_SCHED_HMP
+static inline void init_sched_boost(struct schedtune *st)
+{
+ st->sched_boost_no_override = false;
+ st->sched_boost_enabled = true;
+ st->sched_boost_enabled_backup = st->sched_boost_enabled;
+ st->colocate = false;
+ st->colocate_update_disabled = false;
+}
+
+bool same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2)
+{
+ return task_schedtune(tsk1) == task_schedtune(tsk2);
+}
+
+void update_cgroup_boost_settings(void)
+{
+ int i;
+
+ for (i = 0; i < BOOSTGROUPS_COUNT; i++) {
+ if (!allocated_group[i])
+ break;
+
+ if (allocated_group[i]->sched_boost_no_override)
+ continue;
+
+ allocated_group[i]->sched_boost_enabled = false;
+ }
+}
+
+void restore_cgroup_boost_settings(void)
+{
+ int i;
+
+ for (i = 0; i < BOOSTGROUPS_COUNT; i++) {
+ if (!allocated_group[i])
+ break;
+
+ allocated_group[i]->sched_boost_enabled =
+ allocated_group[i]->sched_boost_enabled_backup;
+ }
+}
+
+bool task_sched_boost(struct task_struct *p)
+{
+ struct schedtune *st = task_schedtune(p);
+
+ return st->sched_boost_enabled;
+}
+
+static u64
+sched_boost_override_read(struct cgroup_subsys_state *css,
+ struct cftype *cft)
+{
+ struct schedtune *st = css_st(css);
+
+ return st->sched_boost_no_override;
+}
+
+static int sched_boost_override_write(struct cgroup_subsys_state *css,
+ struct cftype *cft, u64 override)
+{
+ struct schedtune *st = css_st(css);
+
+ st->sched_boost_no_override = !!override;
+
+ return 0;
+}
+
+static u64 sched_boost_enabled_read(struct cgroup_subsys_state *css,
+ struct cftype *cft)
+{
+ struct schedtune *st = css_st(css);
+
+ return st->sched_boost_enabled;
+}
+
+static int sched_boost_enabled_write(struct cgroup_subsys_state *css,
+ struct cftype *cft, u64 enable)
+{
+ struct schedtune *st = css_st(css);
+
+ st->sched_boost_enabled = !!enable;
+ st->sched_boost_enabled_backup = st->sched_boost_enabled;
+
+ return 0;
+}
+
+static u64 sched_colocate_read(struct cgroup_subsys_state *css,
+ struct cftype *cft)
+{
+ struct schedtune *st = css_st(css);
+
+ return st->colocate;
+}
+
+static int sched_colocate_write(struct cgroup_subsys_state *css,
+ struct cftype *cft, u64 colocate)
+{
+ struct schedtune *st = css_st(css);
+
+ if (st->colocate_update_disabled)
+ return -EPERM;
+
+ st->colocate = !!colocate;
+ st->colocate_update_disabled = true;
+ return 0;
+}
+
+#else /* CONFIG_SCHED_HMP */
+
+static inline void init_sched_boost(struct schedtune *st) { }
+
+#endif /* CONFIG_SCHED_HMP */
+
static u64
boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
@@ -121,12 +270,45 @@ boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
return 0;
}
+static void schedtune_attach(struct cgroup_taskset *tset)
+{
+ struct task_struct *task;
+ struct cgroup_subsys_state *css;
+ struct schedtune *st;
+ bool colocate;
+
+ cgroup_taskset_first(tset, &css);
+ st = css_st(css);
+
+ colocate = st->colocate;
+
+ cgroup_taskset_for_each(task, css, tset)
+ sync_cgroup_colocation(task, colocate);
+}
+
static struct cftype files[] = {
{
.name = "boost",
.read_u64 = boost_read,
.write_u64 = boost_write,
},
+#ifdef CONFIG_SCHED_HMP
+ {
+ .name = "sched_boost_no_override",
+ .read_u64 = sched_boost_override_read,
+ .write_u64 = sched_boost_override_write,
+ },
+ {
+ .name = "sched_boost_enabled",
+ .read_u64 = sched_boost_enabled_read,
+ .write_u64 = sched_boost_enabled_write,
+ },
+ {
+ .name = "colocate",
+ .read_u64 = sched_colocate_read,
+ .write_u64 = sched_colocate_write,
+ },
+#endif
{ } /* terminate */
};
@@ -189,6 +371,7 @@ schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
/* Initialize per CPUs boost group support */
st->idx = idx;
+ init_sched_boost(st);
if (schedtune_boostgroup_init(st))
goto release;
@@ -222,6 +405,7 @@ struct cgroup_subsys schedtune_cgrp_subsys = {
.legacy_cftypes = files,
.early_init = 1,
.allow_attach = subsys_cgroup_allow_attach,
+ .attach = schedtune_attach,
};
#endif /* CONFIG_CGROUP_SCHEDTUNE */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 574316f1c344..b7cbd7940f7b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -124,6 +124,7 @@ static int __maybe_unused neg_one = -1;
static int zero;
static int __maybe_unused one = 1;
static int __maybe_unused two = 2;
+static int __maybe_unused three = 3;
static int __maybe_unused four = 4;
static unsigned long one_ul = 1;
static int one_hundred = 100;
@@ -377,6 +378,22 @@ static struct ctl_table kern_table[] = {
.extra2 = &one_hundred,
},
{
+ .procname = "sched_group_upmigrate",
+ .data = &sysctl_sched_group_upmigrate_pct,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_hmp_proc_update_handler,
+ .extra1 = &zero,
+ },
+ {
+ .procname = "sched_group_downmigrate",
+ .data = &sysctl_sched_group_downmigrate_pct,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_hmp_proc_update_handler,
+ .extra1 = &zero,
+ },
+ {
.procname = "sched_init_task_load",
.data = &sysctl_sched_init_task_load_pct,
.maxlen = sizeof(unsigned int),
@@ -487,6 +504,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sched_boost_handler,
+ .extra1 = &zero,
+ .extra2 = &three,
},
#endif /* CONFIG_SCHED_HMP */
#ifdef CONFIG_SCHED_DEBUG