diff options
| -rw-r--r-- | Documentation/scheduler/sched-hmp.txt | 29 | ||||
| -rw-r--r-- | drivers/base/cpu.c | 95 | ||||
| -rw-r--r-- | include/linux/sched.h | 6 | ||||
| -rw-r--r-- | include/linux/sched/sysctl.h | 2 | ||||
| -rw-r--r-- | kernel/sched/core.c | 10 | ||||
| -rw-r--r-- | kernel/sched/debug.c | 3 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 80 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 8 | ||||
| -rw-r--r-- | kernel/sysctl.c | 14 |
9 files changed, 182 insertions, 65 deletions
diff --git a/Documentation/scheduler/sched-hmp.txt b/Documentation/scheduler/sched-hmp.txt index 63ef1f558ce0..d3321cb208e9 100644 --- a/Documentation/scheduler/sched-hmp.txt +++ b/Documentation/scheduler/sched-hmp.txt @@ -564,23 +564,22 @@ both tasks and CPUs to aid in the placement of tasks. The "mostly_idle" classification applies to CPUs. This classification attempts to answer the following question: if a task - is put on this CPU, is it likely to be able to run soon? One - possible way to answer this question would be to just check whether - the CPU is idle or not. That may be too conservative however. The - CPU may be currently executing a very small task and could become - idle soon. Since the scheduler is tracking the demand of each task - it can make an educated guess as to whether a CPU will become idle - in the near future. + is put on this CPU, is it likely to be able to run with low contention for + bandwidth? One possible way to answer this question would be to just check + whether the CPU is idle or not. That may be too conservative however. The CPU + may be currently executing a very small task and could become idle soon. Since + the scheduler is tracking the demand of each task it can make an educated + guess as to whether a CPU will become idle in the near future. There are two tunable parameters which are used to determine whether a CPU is mostly idle: - /proc/sys/kernel/sched_mostly_idle_nr_run - /proc/sys/kernel/sched_mostly_idle_load + /sys/devices/system/cpu/cpuX/sched_mostly_idle_nr_run + /sys/devices/system/cpu/cpuX/sched_mostly_idle_load - If a CPU does not have more than sched_mostly_idle_nr_run runnable - tasks and is not more than sched_mostly_idle_load percent busy, it - is considered mostly idle. + Note that these tunables are per-cpu. If a CPU does not have more than + sched_mostly_idle_nr_run runnable tasks and is not more than + sched_mostly_idle_load percent busy, it is considered mostly idle. - spill threshold @@ -1042,9 +1041,9 @@ IRQ_UPDATE *** 7.1 sched_mostly_idle_nr_run -Appears at: /proc/sys/kernel/sched_mostly_idle_nr_run +Appears at: /sys/devices/system/cpu/cpuX/sched_mostly_idle_nr_run -Default value: 4 +Default value: 3 If a CPU has this many runnable tasks (or less), it is considered "mostly idle." A mostly idle CPU is a preferred destination for a @@ -1054,7 +1053,7 @@ than sched_mostly_idle_load percent busy. *** 7.2 sched_mostly_idle_load -Appears at: /proc/sys/kernel/sched_mostly_idle_load +Appears at: /sys/devices/system/cpu/cpuX/sched_mostly_idle_load Default value: 20 diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 91bbb1959d8d..763fd00c697b 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -180,10 +180,102 @@ static struct attribute_group crash_note_cpu_attr_group = { }; #endif +#ifdef CONFIG_SCHED_HMP +static ssize_t show_sched_mostly_idle_load(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + ssize_t rc; + int cpunum; + int mostly_idle_pct; + + cpunum = cpu->dev.id; + + mostly_idle_pct = sched_get_cpu_mostly_idle_load(cpunum); + + rc = snprintf(buf, PAGE_SIZE-2, "%d\n", mostly_idle_pct); + + return rc; +} + +static ssize_t __ref store_sched_mostly_idle_load(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + int cpuid = cpu->dev.id; + int mostly_idle_load, err; + + err = kstrtoint(strstrip((char *)buf), 0, &mostly_idle_load); + if (err) + return err; + + err = sched_set_cpu_mostly_idle_load(cpuid, mostly_idle_load); + if (err >= 0) + err = count; + + return err; +} + +static ssize_t show_sched_mostly_idle_nr_run(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + ssize_t rc; + int cpunum; + int mostly_idle_nr_run; + + cpunum = cpu->dev.id; + + mostly_idle_nr_run = sched_get_cpu_mostly_idle_nr_run(cpunum); + + rc = snprintf(buf, PAGE_SIZE-2, "%d\n", mostly_idle_nr_run); + + return rc; +} + +static ssize_t __ref store_sched_mostly_idle_nr_run(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + int cpuid = cpu->dev.id; + int mostly_idle_nr_run, err; + + err = kstrtoint(strstrip((char *)buf), 0, &mostly_idle_nr_run); + if (err) + return err; + + err = sched_set_cpu_mostly_idle_nr_run(cpuid, mostly_idle_nr_run); + if (err >= 0) + err = count; + + return err; +} + +static DEVICE_ATTR(sched_mostly_idle_load, 0664, show_sched_mostly_idle_load, + store_sched_mostly_idle_load); +static DEVICE_ATTR(sched_mostly_idle_nr_run, 0664, + show_sched_mostly_idle_nr_run, store_sched_mostly_idle_nr_run); + +static struct attribute *hmp_sched_cpu_attrs[] = { + &dev_attr_sched_mostly_idle_load.attr, + &dev_attr_sched_mostly_idle_nr_run.attr, + NULL +}; + +static struct attribute_group sched_hmp_cpu_attr_group = { + .attrs = hmp_sched_cpu_attrs, +}; + +#endif /* CONFIG_SCHED_HMP */ static const struct attribute_group *common_cpu_attr_groups[] = { #ifdef CONFIG_KEXEC &crash_note_cpu_attr_group, #endif +#ifdef CONFIG_SCHED_HMP + &sched_hmp_cpu_attr_group, +#endif NULL }; @@ -191,6 +283,9 @@ static const struct attribute_group *hotplugable_cpu_attr_groups[] = { #ifdef CONFIG_KEXEC &crash_note_cpu_attr_group, #endif +#ifdef CONFIG_SCHED_HMP + &sched_hmp_cpu_attr_group, +#endif NULL }; diff --git a/include/linux/sched.h b/include/linux/sched.h index 740696b0a57d..701611dad0fd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2327,9 +2327,15 @@ sched_set_cpu_cstate(int cpu, int cstate, int wakeup_energy, int wakeup_latency) #endif #ifdef CONFIG_SCHED_HMP + extern int sched_set_boost(int enable); extern int sched_set_init_task_load(struct task_struct *p, int init_load_pct); extern u32 sched_get_init_task_load(struct task_struct *p); +extern int sched_set_cpu_mostly_idle_load(int cpu, int mostly_idle_pct); +extern int sched_get_cpu_mostly_idle_load(int cpu); +extern int sched_set_cpu_mostly_idle_nr_run(int cpu, int nr_run); +extern int sched_get_cpu_mostly_idle_nr_run(int cpu); + #else static inline int sched_set_boost(int enable) { diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 8cadba92aee0..bd2abda891cd 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -59,9 +59,7 @@ extern int sysctl_sched_freq_dec_notify; #ifdef CONFIG_SCHED_HMP extern unsigned int sysctl_sched_spill_nr_run; -extern unsigned int sysctl_sched_mostly_idle_nr_run; extern unsigned int sysctl_sched_spill_load_pct; -extern unsigned int sysctl_sched_mostly_idle_load_pct; extern unsigned int sysctl_sched_small_task_pct; extern unsigned int sysctl_sched_upmigrate_pct; extern unsigned int sysctl_sched_downmigrate_pct; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7ec7b5442f41..07aac49174dd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2045,6 +2045,7 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size) u64 start_ts = sched_clock(); int reason = WINDOW_CHANGE; unsigned int old = 0, new = 0; + unsigned int old_window_size = sched_ravg_window; disable_window_stats(); @@ -2067,8 +2068,13 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size) for_each_possible_cpu(cpu) { struct rq *rq = cpu_rq(cpu); - if (window_start) + if (window_start) { + u32 mostly_idle_load = rq->mostly_idle_load; + rq->window_start = window_start; + rq->mostly_idle_load = div64_u64((u64)mostly_idle_load * + (u64)sched_ravg_window, (u64)old_window_size); + } #ifdef CONFIG_SCHED_FREQ_INPUT rq->curr_runnable_sum = rq->prev_runnable_sum = 0; #endif @@ -9126,6 +9132,8 @@ void __init sched_init(void) rq->window_start = 0; rq->nr_small_tasks = rq->nr_big_tasks = 0; rq->hmp_flags = 0; + rq->mostly_idle_load = pct_to_real(20); + rq->mostly_idle_nr_run = 3; #ifdef CONFIG_SCHED_FREQ_INPUT rq->curr_runnable_sum = rq->prev_runnable_sum = 0; rq->old_busy_time = 0; diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index f9bb03279152..752a0de12871 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -310,6 +310,8 @@ do { \ P(cpu_capacity); #endif #ifdef CONFIG_SCHED_HMP + P(mostly_idle_load); + P(mostly_idle_nr_run); P(load_scale_factor); P(capacity); P(max_possible_capacity); @@ -402,7 +404,6 @@ static void sched_debug_header(struct seq_file *m) P(sysctl_sched_child_runs_first); P(sysctl_sched_features); #ifdef CONFIG_SCHED_HMP - P(sched_mostly_idle_load); P(sched_small_task); P(sched_upmigrate); P(sched_downmigrate); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 833225a9fe57..dcc3f57668e1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2570,14 +2570,6 @@ unsigned int __read_mostly sched_enable_hmp = 0; unsigned int __read_mostly sysctl_sched_spill_nr_run = 10; /* - * A cpu is considered practically idle, if: - * - * rq->nr_running <= sysctl_sched_mostly_idle_nr_run && - * rq->cumulative_runnable_avg <= sched_mostly_idle_load - */ -unsigned int __read_mostly sysctl_sched_mostly_idle_nr_run = 3; - -/* * Control whether or not individual CPU power consumption is used to * guide task placement. */ @@ -2591,16 +2583,6 @@ unsigned int __read_mostly sched_enable_power_aware = 0; unsigned int __read_mostly sysctl_sched_powerband_limit_pct = 20; /* - * Conversion of *_pct to absolute form is based on max_task_load(). - * - * For example: - * sched_mostly_idle_load = - * (sysctl_sched_mostly_idle_load_pct * max_task_load()) / 100; - */ -unsigned int __read_mostly sched_mostly_idle_load; -unsigned int __read_mostly sysctl_sched_mostly_idle_load_pct = 20; - -/* * CPUs with load greater than the sched_spill_load_threshold are not * eligible for task placement. When all CPUs in a cluster achieve a * load higher than this level, tasks becomes eligible for inter @@ -2666,17 +2648,11 @@ static inline int available_cpu_capacity(int cpu) return rq->capacity; } -#define pct_to_real(tunable) \ - (div64_u64((u64)tunable * (u64)max_task_load(), 100)) - void set_hmp_defaults(void) { sched_spill_load = pct_to_real(sysctl_sched_spill_load_pct); - sched_mostly_idle_load = - pct_to_real(sysctl_sched_mostly_idle_load_pct); - sched_small_task = pct_to_real(sysctl_sched_small_task_pct); @@ -2715,6 +2691,44 @@ int sched_set_init_task_load(struct task_struct *p, int init_load_pct) return 0; } +int sched_set_cpu_mostly_idle_load(int cpu, int mostly_idle_pct) +{ + struct rq *rq = cpu_rq(cpu); + + if (mostly_idle_pct < 0 || mostly_idle_pct > 100) + return -EINVAL; + + rq->mostly_idle_load = pct_to_real(mostly_idle_pct); + + return 0; +} + +int sched_get_cpu_mostly_idle_load(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + int mostly_idle_pct; + + mostly_idle_pct = real_to_pct(rq->mostly_idle_load); + + return mostly_idle_pct; +} + +int sched_set_cpu_mostly_idle_nr_run(int cpu, int nr_run) +{ + struct rq *rq = cpu_rq(cpu); + + rq->mostly_idle_nr_run = nr_run; + + return 0; +} + +int sched_get_cpu_mostly_idle_nr_run(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + + return rq->mostly_idle_nr_run; +} + /* * 'load' is in reference to "best cpu" at its best frequency. * Scale that in reference to a given cpu, accounting for how bad it is @@ -2776,9 +2790,12 @@ spill_threshold_crossed(struct task_struct *p, struct rq *rq, int cpu) int mostly_idle_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); + int mostly_idle; + + mostly_idle = (cpu_load(cpu) <= rq->mostly_idle_load + && rq->nr_running <= rq->mostly_idle_nr_run); - return (cpu_load(cpu) <= sched_mostly_idle_load - && rq->nr_running <= sysctl_sched_mostly_idle_nr_run); + return mostly_idle; } static int boost_refcount; @@ -3344,10 +3361,9 @@ int sched_hmp_proc_update_handler(struct ctl_table *table, int write, return ret; if ((sysctl_sched_downmigrate_pct > sysctl_sched_upmigrate_pct) || - (sysctl_sched_mostly_idle_load_pct > - sysctl_sched_spill_load_pct) || *data > 100) { - *data = old_val; - return -EINVAL; + *data > 100) { + *data = old_val; + return -EINVAL; } /* @@ -9254,8 +9270,8 @@ static inline int _nohz_kick_needed_hmp(struct rq *rq, int cpu, int *type) int i; if (rq->nr_running >= 2 && (rq->nr_running - rq->nr_small_tasks >= 2 || - rq->nr_running > sysctl_sched_mostly_idle_nr_run || - cpu_load(cpu) > sched_mostly_idle_load)) { + rq->nr_running > rq->mostly_idle_nr_run || + cpu_load(cpu) > rq->mostly_idle_load)) { if (rq->capacity == max_capacity) return 1; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index a4c7dde3c892..d0193932354f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -658,6 +658,8 @@ struct rq { int capacity; int max_possible_capacity; u64 window_start; + u32 mostly_idle_load; + int mostly_idle_nr_run; #ifdef CONFIG_SCHED_FREQ_INPUT unsigned int old_busy_time; @@ -1013,6 +1015,12 @@ dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p) BUG_ON((s64)rq->cumulative_runnable_avg < 0); } +#define pct_to_real(tunable) \ + (div64_u64((u64)tunable * (u64)max_task_load(), 100)) + +#define real_to_pct(tunable) \ + (div64_u64((u64)tunable * (u64)100, (u64)max_task_load())) + #else /* CONFIG_SCHED_HMP */ static inline int pct_task_load(struct task_struct *p) { return 0; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e7b03e816bd7..2281f24194db 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -366,20 +366,6 @@ static struct ctl_table kern_table[] = { .proc_handler = sched_hmp_proc_update_handler, }, { - .procname = "sched_mostly_idle_load", - .data = &sysctl_sched_mostly_idle_load_pct, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sched_hmp_proc_update_handler, - }, - { - .procname = "sched_mostly_idle_nr_run", - .data = &sysctl_sched_mostly_idle_nr_run, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { .procname = "sched_spill_load", .data = &sysctl_sched_spill_load_pct, .maxlen = sizeof(unsigned int), |
