diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/events/core.c | 21 | ||||
| -rw-r--r-- | kernel/fork.c | 3 | ||||
| -rw-r--r-- | kernel/panic.c | 2 | ||||
| -rw-r--r-- | kernel/resource.c | 13 | ||||
| -rw-r--r-- | kernel/sched/core.c | 14 | ||||
| -rw-r--r-- | kernel/sched/cpupri.c | 11 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 408 | ||||
| -rw-r--r-- | kernel/sched/walt.h | 2 | ||||
| -rw-r--r-- | kernel/softirq.c | 5 | ||||
| -rw-r--r-- | kernel/time/alarmtimer.c | 3 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 101 |
12 files changed, 446 insertions, 139 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 69f8f683138a..87f5d841f796 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6600,21 +6600,6 @@ static void perf_log_itrace_start(struct perf_event *event) perf_output_end(&handle); } -static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs) -{ - /* - * Due to interrupt latency (AKA "skid"), we may enter the - * kernel before taking an overflow, even if the PMU is only - * counting user events. - * To avoid leaking information to userspace, we must always - * reject kernel samples when exclude_kernel is set. - */ - if (event->attr.exclude_kernel && !user_mode(regs)) - return false; - - return true; -} - /* * Generic event overflow handling, sampling. */ @@ -6662,12 +6647,6 @@ static int __perf_event_overflow(struct perf_event *event, } /* - * For security, drop the skid kernel samples if necessary. - */ - if (!sample_is_allowed(event, regs)) - return ret; - - /* * XXX event_limit might not quite work as expected on inherited * events */ diff --git a/kernel/fork.c b/kernel/fork.c index 246b8a57a32d..fef4df444f47 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -832,8 +832,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) mm = get_task_mm(task); if (mm && mm != current->mm && - !ptrace_may_access(task, mode) && - !capable(CAP_SYS_RESOURCE)) { + !ptrace_may_access(task, mode)) { mmput(mm); mm = ERR_PTR(-EACCES); } diff --git a/kernel/panic.c b/kernel/panic.c index 679254405510..75f564a94a82 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -24,6 +24,7 @@ #include <linux/init.h> #include <linux/nmi.h> #include <linux/console.h> +#include <soc/qcom/minidump.h> #define CREATE_TRACE_POINTS #include <trace/events/exception.h> @@ -108,6 +109,7 @@ void panic(const char *fmt, ...) va_start(args, fmt); vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); + dump_stack_minidump(0); pr_emerg("Kernel panic - not syncing: %s\n", buf); #ifdef CONFIG_DEBUG_BUGVERBOSE /* diff --git a/kernel/resource.c b/kernel/resource.c index 4c9835c09dcd..c09d484f7b5f 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -105,16 +105,25 @@ static int r_show(struct seq_file *m, void *v) { struct resource *root = m->private; struct resource *r = v, *p; + unsigned long long start, end; int width = root->end < 0x10000 ? 4 : 8; int depth; for (depth = 0, p = r; depth < MAX_IORES_LEVEL; depth++, p = p->parent) if (p->parent == root) break; + + if (file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN)) { + start = r->start; + end = r->end; + } else { + start = end = 0; + } + seq_printf(m, "%*s%0*llx-%0*llx : %s\n", depth * 2, "", - width, (unsigned long long) r->start, - width, (unsigned long long) r->end, + width, start, + width, end, r->name ? r->name : "<BAD>"); return 0; } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 18f4fb65cd1d..4ecca604e64b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9232,11 +9232,20 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) if (IS_ERR(tg)) return ERR_PTR(-ENOMEM); - sched_online_group(tg, parent); - return &tg->css; } +/* Expose task group only after completing cgroup initialization */ +static int cpu_cgroup_css_online(struct cgroup_subsys_state *css) +{ + struct task_group *tg = css_tg(css); + struct task_group *parent = css_tg(css->parent); + + if (parent) + sched_online_group(tg, parent); + return 0; +} + static void cpu_cgroup_css_released(struct cgroup_subsys_state *css) { struct task_group *tg = css_tg(css); @@ -9618,6 +9627,7 @@ static struct cftype cpu_files[] = { struct cgroup_subsys cpu_cgrp_subsys = { .css_alloc = cpu_cgroup_css_alloc, + .css_online = cpu_cgroup_css_online, .css_released = cpu_cgroup_css_released, .css_free = cpu_cgroup_css_free, .fork = cpu_cgroup_fork, diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 1d00cf8c00fa..14225d5d8617 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -279,3 +279,14 @@ void cpupri_cleanup(struct cpupri *cp) for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) free_cpumask_var(cp->pri_to_cpu[i].mask); } + +/* + * cpupri_check_rt - check if CPU has a RT task + * should be called from rcu-sched read section. + */ +bool cpupri_check_rt(void) +{ + int cpu = raw_smp_processor_id(); + + return cpu_rq(cpu)->rd->cpupri.cpu_to_pri[cpu] > CPUPRI_NORMAL; +} diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 422438d43d90..853064319b0d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -35,6 +35,8 @@ #include "sched.h" #include <trace/events/sched.h> #include "tune.h" +#include "walt.h" + /* * Targeted preemption latency for CPU-bound tasks: * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds) @@ -6548,9 +6550,11 @@ static int find_new_capacity(struct energy_env *eenv, return idx; } -static int group_idle_state(struct sched_group *sg) +static int group_idle_state(struct energy_env *eenv, struct sched_group *sg) { int i, state = INT_MAX; + int src_in_grp, dst_in_grp; + long grp_util = 0; /* Find the shallowest idle state in the sched group. */ for_each_cpu(i, sched_group_cpus(sg)) @@ -6559,6 +6563,54 @@ static int group_idle_state(struct sched_group *sg) /* Take non-cpuidle idling into account (active idle/arch_cpu_idle()) */ state++; + /* + * Try to estimate if a deeper idle state is + * achievable when we move the task. + */ + for_each_cpu(i, sched_group_cpus(sg)) + grp_util += cpu_util(i); + + src_in_grp = cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg)); + dst_in_grp = cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg)); + if (src_in_grp == dst_in_grp) { + /* both CPUs under consideration are in the same group or not in + * either group, migration should leave idle state the same. + */ + goto end; + } + /* add or remove util as appropriate to indicate what group util + * will be (worst case - no concurrent execution) after moving the task + */ + grp_util += src_in_grp ? -eenv->util_delta : eenv->util_delta; + + if (grp_util <= + ((long)sg->sgc->max_capacity * (int)sg->group_weight)) { + /* after moving, this group is at most partly + * occupied, so it should have some idle time. + */ + int max_idle_state_idx = sg->sge->nr_idle_states - 2; + int new_state = grp_util * max_idle_state_idx; + if (grp_util <= 0) + /* group will have no util, use lowest state */ + new_state = max_idle_state_idx + 1; + else { + /* for partially idle, linearly map util to idle + * states, excluding the lowest one. This does not + * correspond to the state we expect to enter in + * reality, but an indication of what might happen. + */ + new_state = min(max_idle_state_idx, (int) + (new_state / sg->sgc->max_capacity)); + new_state = max_idle_state_idx - new_state; + } + state = new_state; + } else { + /* After moving, the group will be fully occupied + * so assume it will not be idle at all. + */ + state = 0; + } +end: return state; } @@ -6631,8 +6683,9 @@ static int sched_group_energy(struct energy_env *eenv) } } - idle_idx = group_idle_state(sg); + idle_idx = group_idle_state(eenv, sg); group_util = group_norm_util(eenv, sg); + sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power) >> SCHED_CAPACITY_SHIFT; sg_idle_energy = ((SCHED_LOAD_SCALE-group_util) @@ -7337,48 +7390,59 @@ static int start_cpu(bool boosted) return boosted ? rd->max_cap_orig_cpu : rd->min_cap_orig_cpu; } -static inline int find_best_target(struct task_struct *p, bool boosted, bool prefer_idle) +static inline int find_best_target(struct task_struct *p, int *backup_cpu, + bool boosted, bool prefer_idle) { - int target_cpu = -1; - unsigned long target_util = prefer_idle ? ULONG_MAX : 0; - unsigned long backup_capacity = ULONG_MAX; - int best_idle_cpu = -1; - int best_idle_cstate = INT_MAX; - int backup_cpu = -1; + unsigned long best_idle_min_cap_orig = ULONG_MAX; unsigned long min_util = boosted_task_util(p); + unsigned long target_capacity = ULONG_MAX; + unsigned long min_wake_util = ULONG_MAX; + unsigned long target_max_spare_cap = 0; + unsigned long target_util = ULONG_MAX; + unsigned long best_active_util = ULONG_MAX; + int best_idle_cstate = INT_MAX; struct sched_domain *sd; struct sched_group *sg; - int cpu = start_cpu(boosted); + int best_active_cpu = -1; + int best_idle_cpu = -1; + int target_cpu = -1; + int cpu, i; + + *backup_cpu = -1; schedstat_inc(p, se.statistics.nr_wakeups_fbt_attempts); schedstat_inc(this_rq(), eas_stats.fbt_attempts); + /* Find start CPU based on boost value */ + cpu = start_cpu(boosted); if (cpu < 0) { schedstat_inc(p, se.statistics.nr_wakeups_fbt_no_cpu); schedstat_inc(this_rq(), eas_stats.fbt_no_cpu); - return target_cpu; + return -1; } + /* Find SD for the start CPU */ sd = rcu_dereference(per_cpu(sd_ea, cpu)); - if (!sd) { schedstat_inc(p, se.statistics.nr_wakeups_fbt_no_sd); schedstat_inc(this_rq(), eas_stats.fbt_no_sd); - return target_cpu; + return -1; } + /* Scan CPUs in all SDs */ sg = sd->groups; - do { - int i; - for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) { - unsigned long cur_capacity, new_util, wake_util; - unsigned long min_wake_util = ULONG_MAX; + unsigned long capacity_curr = capacity_curr_of(i); + unsigned long capacity_orig = capacity_orig_of(i); + unsigned long wake_util, new_util; if (!cpu_online(i)) continue; + if (walt_cpu_high_irqload(i)) + continue; + /* * p's blocked utilization is still accounted for on prev_cpu * so prev_cpu will receive a negative bias due to the double @@ -7393,65 +7457,204 @@ static inline int find_best_target(struct task_struct *p, bool boosted, bool pre * than the one required to boost the task. */ new_util = max(min_util, new_util); - - if (new_util > capacity_orig_of(i)) + if (new_util > capacity_orig) continue; /* - * Unconditionally favoring tasks that prefer idle cpus to + * Case A) Latency sensitive tasks + * + * Unconditionally favoring tasks that prefer idle CPU to * improve latency. + * + * Looking for: + * - an idle CPU, whatever its idle_state is, since + * the first CPUs we explore are more likely to be + * reserved for latency sensitive tasks. + * - a non idle CPU where the task fits in its current + * capacity and has the maximum spare capacity. + * - a non idle CPU with lower contention from other + * tasks and running at the lowest possible OPP. + * + * The last two goals tries to favor a non idle CPU + * where the task can run as if it is "almost alone". + * A maximum spare capacity CPU is favoured since + * the task already fits into that CPU's capacity + * without waiting for an OPP chance. + * + * The following code path is the only one in the CPUs + * exploration loop which is always used by + * prefer_idle tasks. It exits the loop with wither a + * best_active_cpu or a target_cpu which should + * represent an optimal choice for latency sensitive + * tasks. */ - if (idle_cpu(i) && prefer_idle) { - schedstat_inc(p, se.statistics.nr_wakeups_fbt_pref_idle); - schedstat_inc(this_rq(), eas_stats.fbt_pref_idle); - return i; - } + if (prefer_idle) { - cur_capacity = capacity_curr_of(i); - - if (new_util < cur_capacity) { - if (cpu_rq(i)->nr_running) { - /* - * Find a target cpu with the lowest/highest - * utilization if prefer_idle/!prefer_idle. - */ - if (prefer_idle) { - /* Favor the CPU that last ran the task */ - if (new_util > target_util || - wake_util > min_wake_util) - continue; - min_wake_util = wake_util; - target_util = new_util; - target_cpu = i; - } else if (target_util < new_util) { - target_util = new_util; - target_cpu = i; - } - } else if (!prefer_idle) { - int idle_idx = idle_get_state_idx(cpu_rq(i)); + /* + * Case A.1: IDLE CPU + * Return the first IDLE CPU we find. + */ + if (idle_cpu(i)) { + schedstat_inc(p, se.statistics.nr_wakeups_fbt_pref_idle); + schedstat_inc(this_rq(), eas_stats.fbt_pref_idle); - if (best_idle_cpu < 0 || - (sysctl_sched_cstate_aware && - best_idle_cstate > idle_idx)) { - best_idle_cstate = idle_idx; - best_idle_cpu = i; - } + trace_sched_find_best_target(p, + prefer_idle, min_util, + cpu, best_idle_cpu, + best_active_cpu, i); + + return i; } - } else if (backup_capacity > cur_capacity) { - /* Find a backup cpu with least capacity. */ - backup_capacity = cur_capacity; - backup_cpu = i; + + /* + * Case A.2: Target ACTIVE CPU + * Favor CPUs with max spare capacity. + */ + if ((capacity_curr > new_util) && + (capacity_orig - new_util > target_max_spare_cap)) { + target_max_spare_cap = capacity_orig - new_util; + target_cpu = i; + continue; + } + if (target_cpu != -1) + continue; + + + /* + * Case A.3: Backup ACTIVE CPU + * Favor CPUs with: + * - lower utilization due to other tasks + * - lower utilization with the task in + */ + if (wake_util > min_wake_util) + continue; + if (new_util > best_active_util) + continue; + min_wake_util = wake_util; + best_active_util = new_util; + best_active_cpu = i; + continue; } + + /* + * Case B) Non latency sensitive tasks on IDLE CPUs. + * + * Find an optimal backup IDLE CPU for non latency + * sensitive tasks. + * + * Looking for: + * - minimizing the capacity_orig, + * i.e. preferring LITTLE CPUs + * - favoring shallowest idle states + * i.e. avoid to wakeup deep-idle CPUs + * + * The following code path is used by non latency + * sensitive tasks if IDLE CPUs are available. If at + * least one of such CPUs are available it sets the + * best_idle_cpu to the most suitable idle CPU to be + * selected. + * + * If idle CPUs are available, favour these CPUs to + * improve performances by spreading tasks. + * Indeed, the energy_diff() computed by the caller + * will take care to ensure the minimization of energy + * consumptions without affecting performance. + */ + if (idle_cpu(i)) { + int idle_idx = idle_get_state_idx(cpu_rq(i)); + + /* Select idle CPU with lower cap_orig */ + if (capacity_orig > best_idle_min_cap_orig) + continue; + + /* + * Skip CPUs in deeper idle state, but only + * if they are also less energy efficient. + * IOW, prefer a deep IDLE LITTLE CPU vs a + * shallow idle big CPU. + */ + if (sysctl_sched_cstate_aware && + best_idle_cstate <= idle_idx) + continue; + + /* Keep track of best idle CPU */ + best_idle_min_cap_orig = capacity_orig; + best_idle_cstate = idle_idx; + best_idle_cpu = i; + continue; + } + + /* + * Case C) Non latency sensitive tasks on ACTIVE CPUs. + * + * Pack tasks in the most energy efficient capacities. + * + * This task packing strategy prefers more energy + * efficient CPUs (i.e. pack on smaller maximum + * capacity CPUs) while also trying to spread tasks to + * run them all at the lower OPP. + * + * This assumes for example that it's more energy + * efficient to run two tasks on two CPUs at a lower + * OPP than packing both on a single CPU but running + * that CPU at an higher OPP. + * + * Thus, this case keep track of the CPU with the + * smallest maximum capacity and highest spare maximum + * capacity. + */ + + /* Favor CPUs with smaller capacity */ + if (capacity_orig > target_capacity) + continue; + + /* Favor CPUs with maximum spare capacity */ + if ((capacity_orig - new_util) < target_max_spare_cap) + continue; + + target_max_spare_cap = capacity_orig - new_util; + target_capacity = capacity_orig; + target_util = new_util; + target_cpu = i; } + } while (sg = sg->next, sg != sd->groups); - if (target_cpu < 0) - target_cpu = best_idle_cpu >= 0 ? best_idle_cpu : backup_cpu; + /* + * For non latency sensitive tasks, cases B and C in the previous loop, + * we pick the best IDLE CPU only if we was not able to find a target + * ACTIVE CPU. + * + * Policies priorities: + * + * - prefer_idle tasks: + * + * a) IDLE CPU available, we return immediately + * b) ACTIVE CPU where task fits and has the bigger maximum spare + * capacity (i.e. target_cpu) + * c) ACTIVE CPU with less contention due to other tasks + * (i.e. best_active_cpu) + * + * - NON prefer_idle tasks: + * + * a) ACTIVE CPU: target_cpu + * b) IDLE CPU: best_idle_cpu + */ + if (target_cpu == -1) + target_cpu = prefer_idle + ? best_active_cpu + : best_idle_cpu; + else + *backup_cpu = prefer_idle + ? best_active_cpu + : best_idle_cpu; - if (target_cpu >= 0) { - schedstat_inc(p, se.statistics.nr_wakeups_fbt_count); - schedstat_inc(this_rq(), eas_stats.fbt_count); - } + trace_sched_find_best_target(p, prefer_idle, min_util, cpu, + best_idle_cpu, best_active_cpu, + target_cpu); + + schedstat_inc(p, se.statistics.nr_wakeups_fbt_count); + schedstat_inc(this_rq(), eas_stats.fbt_count); return target_cpu; } @@ -7483,7 +7686,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync) { struct sched_domain *sd; - int target_cpu = prev_cpu, tmp_target; + int target_cpu = prev_cpu, tmp_target, tmp_backup; bool boosted, prefer_idle; schedstat_inc(p, se.statistics.nr_wakeups_secb_attempts); @@ -7508,9 +7711,11 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync prefer_idle = 0; #endif + sync_entity_load_avg(&p->se); + sd = rcu_dereference(per_cpu(sd_ea, prev_cpu)); /* Find a cpu with sufficient capacity */ - tmp_target = find_best_target(p, boosted, prefer_idle); + tmp_target = find_best_target(p, &tmp_backup, boosted, prefer_idle); if (!sd) goto unlock; @@ -7539,10 +7744,15 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync } if (energy_diff(&eenv) >= 0) { - schedstat_inc(p, se.statistics.nr_wakeups_secb_no_nrg_sav); - schedstat_inc(this_rq(), eas_stats.secb_no_nrg_sav); - target_cpu = prev_cpu; - goto unlock; + /* No energy saving for target_cpu, try backup */ + target_cpu = tmp_backup; + eenv.dst_cpu = target_cpu; + if (tmp_backup < 0 || energy_diff(&eenv) >= 0) { + schedstat_inc(p, se.statistics.nr_wakeups_secb_no_nrg_sav); + schedstat_inc(this_rq(), eas_stats.secb_no_nrg_sav); + target_cpu = prev_cpu; + goto unlock; + } } schedstat_inc(p, se.statistics.nr_wakeups_secb_nrg_sav); @@ -7584,16 +7794,9 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f return select_best_cpu(p, prev_cpu, 0, sync); #endif - if (sd_flag & SD_BALANCE_WAKE) { - /* - * do wake_cap unconditionally as it causes task and cpu - * utilization to be synced, and we need that for energy - * aware wakeups - */ - int _wake_cap = wake_cap(p, cpu, prev_cpu); - want_affine = !wake_wide(p) && !_wake_cap + if (sd_flag & SD_BALANCE_WAKE) + want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) && cpumask_test_cpu(cpu, tsk_cpus_allowed(p)); - } if (energy_aware() && !(cpu_rq(prev_cpu)->rd->overutilized)) return select_energy_cpu_brute(p, prev_cpu, sync); @@ -9189,6 +9392,38 @@ group_type group_classify(struct sched_group *group, return group_other; } +#ifdef CONFIG_NO_HZ_COMMON +/* + * idle load balancing data + * - used by the nohz balance, but we want it available here + * so that we can see which CPUs have no tick. + */ +static struct { + cpumask_var_t idle_cpus_mask; + atomic_t nr_cpus; + unsigned long next_balance; /* in jiffy units */ +} nohz ____cacheline_aligned; + +static inline void update_cpu_stats_if_tickless(struct rq *rq) +{ + /* only called from update_sg_lb_stats when irqs are disabled */ + if (cpumask_test_cpu(rq->cpu, nohz.idle_cpus_mask)) { + /* rate limit updates to once-per-jiffie at most */ + if (READ_ONCE(jiffies) <= rq->last_load_update_tick) + return; + + raw_spin_lock(&rq->lock); + update_rq_clock(rq); + update_idle_cpu_load(rq); + update_cfs_rq_load_avg(rq->clock_task, &rq->cfs, false); + raw_spin_unlock(&rq->lock); + } +} + +#else +static inline void update_cpu_stats_if_tickless(struct rq *rq) { } +#endif + /** * update_sg_lb_stats - Update sched_group's statistics for load balancing. * @env: The load balancing environment. @@ -9220,6 +9455,12 @@ static inline void update_sg_lb_stats(struct lb_env *env, if (cpu_isolated(i)) continue; + /* if we are entering idle and there are CPUs with + * their tick stopped, do an update for them + */ + if (env->idle == CPU_NEWLY_IDLE) + update_cpu_stats_if_tickless(rq); + /* Bias balancing toward cpus of our domain */ if (local_group) load = target_load(i, load_idx); @@ -10680,11 +10921,6 @@ static inline int on_null_domain(struct rq *rq) * needed, they will kick the idle load balancer, which then does idle * load balancing for all the idle CPUs. */ -static struct { - cpumask_var_t idle_cpus_mask; - atomic_t nr_cpus; - unsigned long next_balance; /* in jiffy units */ -} nohz ____cacheline_aligned; #ifdef CONFIG_SCHED_HMP static inline int find_new_hmp_ilb(int type) @@ -11111,6 +11347,10 @@ static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type) (!energy_aware() || cpu_overutilized(cpu))) return true; + /* Do idle load balance if there have misfit task */ + if (energy_aware() && rq->misfit_task) + return 1; + return (rq->nr_running >= 2); } diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h index e181c87a928d..f56c4da16d0b 100644 --- a/kernel/sched/walt.h +++ b/kernel/sched/walt.h @@ -55,6 +55,8 @@ static inline void walt_migrate_sync_cpu(int cpu) { } static inline void walt_init_cpu_efficiency(void) { } static inline u64 walt_ktime_clock(void) { return 0; } +#define walt_cpu_high_irqload(cpu) false + #endif /* CONFIG_SCHED_WALT */ extern unsigned int walt_disabled; diff --git a/kernel/softirq.c b/kernel/softirq.c index 39ffd41594ce..9029227e5f57 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -234,6 +234,8 @@ static inline bool lockdep_softirq_start(void) { return false; } static inline void lockdep_softirq_end(bool in_hardirq) { } #endif +#define long_softirq_pending() (local_softirq_pending() & LONG_SOFTIRQ_MASK) +#define defer_for_rt() (long_softirq_pending() && cpupri_check_rt()) asmlinkage __visible void __do_softirq(void) { unsigned long end = jiffies + MAX_SOFTIRQ_TIME; @@ -297,6 +299,7 @@ restart: pending = local_softirq_pending(); if (pending) { if (time_before(jiffies, end) && !need_resched() && + !defer_for_rt() && --max_restart) goto restart; @@ -349,7 +352,7 @@ void irq_enter(void) static inline void invoke_softirq(void) { - if (!force_irqthreads) { + if (!force_irqthreads && !defer_for_rt()) { #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK /* * We can safely execute softirq on the current stack if diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 1a4de0022cc5..ceec77c652b5 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -848,7 +848,8 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, * Rate limit to the tick as a hot fix to prevent DOS. Will be * mopped up later. */ - if (ktime_to_ns(timr->it.alarm.interval) < TICK_NSEC) + if (timr->it.alarm.interval.tv64 && + ktime_to_ns(timr->it.alarm.interval) < TICK_NSEC) timr->it.alarm.interval = ktime_set(0, TICK_NSEC); exp = timespec_to_ktime(new_setting->it_value); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 34b2a0d5cf1a..eba904bae48c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3535,7 +3535,7 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod) int exclude_mod = 0; int found = 0; int ret; - int clear_filter; + int clear_filter = 0; if (func) { func_g.type = filter_parse_regex(func, len, &func_g.search, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 60d246c4eefa..a579a874045b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1362,7 +1362,7 @@ static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; struct saved_cmdlines_buffer { unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; unsigned *map_cmdline_to_pid; - unsigned *saved_tgids; + unsigned *map_cmdline_to_tgid; unsigned cmdline_num; int cmdline_idx; char *saved_cmdlines; @@ -1396,9 +1396,10 @@ static int allocate_cmdlines_buffer(unsigned int val, return -ENOMEM; } - s->saved_tgids = kmalloc_array(val, sizeof(*s->saved_tgids), - GFP_KERNEL); - if (!s->saved_tgids) { + s->map_cmdline_to_tgid = kmalloc_array(val, + sizeof(*s->map_cmdline_to_tgid), + GFP_KERNEL); + if (!s->map_cmdline_to_tgid) { kfree(s->map_cmdline_to_pid); kfree(s->saved_cmdlines); return -ENOMEM; @@ -1410,8 +1411,8 @@ static int allocate_cmdlines_buffer(unsigned int val, sizeof(s->map_pid_to_cmdline)); memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP, val * sizeof(*s->map_cmdline_to_pid)); - memset(s->saved_tgids, 0, - val * sizeof(*s->saved_tgids)); + memset(s->map_cmdline_to_tgid, NO_CMDLINE_MAP, + val * sizeof(*s->map_cmdline_to_tgid)); return 0; } @@ -1577,14 +1578,17 @@ static int trace_save_cmdline(struct task_struct *tsk) if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT)) return 0; + preempt_disable(); /* * It's not the end of the world if we don't get * the lock, but we also don't want to spin * nor do we want to disable interrupts, * so if we miss here, then better luck next time. */ - if (!arch_spin_trylock(&trace_cmdline_lock)) + if (!arch_spin_trylock(&trace_cmdline_lock)) { + preempt_enable(); return 0; + } idx = savedcmd->map_pid_to_cmdline[tsk->pid]; if (idx == NO_CMDLINE_MAP) { @@ -1607,8 +1611,9 @@ static int trace_save_cmdline(struct task_struct *tsk) } set_cmdline(idx, tsk->comm); - savedcmd->saved_tgids[idx] = tsk->tgid; + savedcmd->map_cmdline_to_tgid[idx] = tsk->tgid; arch_spin_unlock(&trace_cmdline_lock); + preempt_enable(); return 1; } @@ -1650,19 +1655,29 @@ void trace_find_cmdline(int pid, char comm[]) preempt_enable(); } -int trace_find_tgid(int pid) +static int __find_tgid_locked(int pid) { unsigned map; int tgid; - preempt_disable(); - arch_spin_lock(&trace_cmdline_lock); map = savedcmd->map_pid_to_cmdline[pid]; if (map != NO_CMDLINE_MAP) - tgid = savedcmd->saved_tgids[map]; + tgid = savedcmd->map_cmdline_to_tgid[map]; else tgid = -1; + return tgid; +} + +int trace_find_tgid(int pid) +{ + int tgid; + + preempt_disable(); + arch_spin_lock(&trace_cmdline_lock); + + tgid = __find_tgid_locked(pid); + arch_spin_unlock(&trace_cmdline_lock); preempt_enable(); @@ -3979,10 +3994,15 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf, { char buf[64]; int r; + unsigned int n; + preempt_disable(); arch_spin_lock(&trace_cmdline_lock); - r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num); + n = savedcmd->cmdline_num; arch_spin_unlock(&trace_cmdline_lock); + preempt_enable(); + + r = scnprintf(buf, sizeof(buf), "%u\n", n); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -3991,7 +4011,7 @@ static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) { kfree(s->saved_cmdlines); kfree(s->map_cmdline_to_pid); - kfree(s->saved_tgids); + kfree(s->map_cmdline_to_tgid); kfree(s); } @@ -4008,10 +4028,12 @@ static int tracing_resize_saved_cmdlines(unsigned int val) return -ENOMEM; } + preempt_disable(); arch_spin_lock(&trace_cmdline_lock); savedcmd_temp = savedcmd; savedcmd = s; arch_spin_unlock(&trace_cmdline_lock); + preempt_enable(); free_saved_cmdlines_buffer(savedcmd_temp); return 0; @@ -4230,33 +4252,61 @@ tracing_saved_tgids_read(struct file *file, char __user *ubuf, char *file_buf; char *buf; int len = 0; - int pid; int i; + int *pids; + int n = 0; - file_buf = kmalloc(savedcmd->cmdline_num*(16+1+16), GFP_KERNEL); - if (!file_buf) - return -ENOMEM; + preempt_disable(); + arch_spin_lock(&trace_cmdline_lock); - buf = file_buf; + pids = kmalloc_array(savedcmd->cmdline_num, 2*sizeof(int), GFP_KERNEL); + if (!pids) { + arch_spin_unlock(&trace_cmdline_lock); + preempt_enable(); + return -ENOMEM; + } for (i = 0; i < savedcmd->cmdline_num; i++) { - int tgid; - int r; + int pid; pid = savedcmd->map_cmdline_to_pid[i]; if (pid == -1 || pid == NO_CMDLINE_MAP) continue; - tgid = trace_find_tgid(pid); - r = sprintf(buf, "%d %d\n", pid, tgid); + pids[n] = pid; + pids[n+1] = __find_tgid_locked(pid); + n += 2; + } + arch_spin_unlock(&trace_cmdline_lock); + preempt_enable(); + + if (n == 0) { + kfree(pids); + return 0; + } + + /* enough to hold max pair of pids + space, lr and nul */ + len = n * 12; + file_buf = kmalloc(len, GFP_KERNEL); + if (!file_buf) { + kfree(pids); + return -ENOMEM; + } + + buf = file_buf; + for (i = 0; i < n && len > 0; i += 2) { + int r; + + r = snprintf(buf, len, "%d %d\n", pids[i], pids[i+1]); buf += r; - len += r; + len -= r; } len = simple_read_from_buffer(ubuf, cnt, ppos, - file_buf, len); + file_buf, buf - file_buf); kfree(file_buf); + kfree(pids); return len; } @@ -6847,6 +6897,7 @@ static int instance_rmdir(const char *name) } kfree(tr->topts); + free_cpumask_var(tr->tracing_cpumask); kfree(tr->name); kfree(tr); |
