diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/fork.c | 8 | ||||
-rw-r--r-- | kernel/irq/chip.c | 2 | ||||
-rw-r--r-- | kernel/irq/cpuhotplug.c | 2 | ||||
-rw-r--r-- | kernel/irq/msi.c | 2 | ||||
-rw-r--r-- | kernel/kprobes.c | 2 | ||||
-rw-r--r-- | kernel/padata.c | 2 | ||||
-rw-r--r-- | kernel/pid_namespace.c | 2 | ||||
-rw-r--r-- | kernel/sched/core_ctl.c | 79 | ||||
-rw-r--r-- | kernel/sched/fair.c | 41 | ||||
-rw-r--r-- | kernel/sched/hmp.c | 78 | ||||
-rw-r--r-- | kernel/sched/sched.h | 11 | ||||
-rw-r--r-- | kernel/sched/sched_avg.c | 40 | ||||
-rw-r--r-- | kernel/time/Makefile | 1 | ||||
-rw-r--r-- | kernel/time/hrtimer.c | 87 | ||||
-rw-r--r-- | kernel/time/posix-cpu-timers.c | 2 | ||||
-rw-r--r-- | kernel/time/timer.c | 48 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 10 | ||||
-rw-r--r-- | kernel/time/timer_stats.c | 425 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 5 | ||||
-rw-r--r-- | kernel/workqueue.c | 2 |
20 files changed, 229 insertions, 620 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 622571d2a833..2845c5bdc8e3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1596,11 +1596,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, */ recalc_sigpending(); if (signal_pending(current)) { - spin_unlock(¤t->sighand->siglock); - write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_cancel_cgroup; } + if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) { + retval = -ENOMEM; + goto bad_fork_cancel_cgroup; + } if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); @@ -1651,6 +1653,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, return p; bad_fork_cancel_cgroup: + spin_unlock(¤t->sighand->siglock); + write_unlock_irq(&tasklist_lock); cgroup_cancel_fork(p, cgrp_ss_priv); bad_fork_free_pid: threadgroup_change_end(current); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 9812d9c0d483..e0449956298e 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -810,8 +810,8 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle, if (!desc) return; - __irq_do_set_handler(desc, handle, 1, NULL); desc->irq_common_data.handler_data = data; + __irq_do_set_handler(desc, handle, 1, NULL); irq_put_desc_busunlock(desc, flags); } diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index dac3724e4c1e..6c8e154c7384 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -47,7 +47,7 @@ static bool migrate_one_irq(struct irq_desc *desc) if (!c->irq_set_affinity) { pr_debug("IRQ%u: unable to set affinity\n", d->irq); } else { - int r = irq_do_set_affinity(d, affinity, false); + int r = irq_set_affinity_locked(d, affinity, false); if (r) pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n", d->irq, r); diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index cd6009006510..41b40f310c28 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -268,7 +268,7 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, struct msi_domain_ops *ops = info->ops; msi_alloc_info_t arg; struct msi_desc *desc; - int i, ret, virq; + int i, ret, virq = 0; ret = ops->msi_check(domain, info, dev); if (ret == 0) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index d10ab6b9b5e0..695763516908 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -563,7 +563,7 @@ static void kprobe_optimizer(struct work_struct *work) } /* Wait for completing optimization and unoptimization */ -static void wait_for_kprobe_optimizer(void) +void wait_for_kprobe_optimizer(void) { mutex_lock(&kprobe_mutex); diff --git a/kernel/padata.c b/kernel/padata.c index 401227e3967c..ecc7b3f452c7 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -357,7 +357,7 @@ static int padata_setup_cpumasks(struct parallel_data *pd, cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask); if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) { - free_cpumask_var(pd->cpumask.cbcpu); + free_cpumask_var(pd->cpumask.pcpu); return -ENOMEM; } diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index a65ba137fd15..567ecc826bc8 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -255,7 +255,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) * if reparented. */ for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); + set_current_state(TASK_INTERRUPTIBLE); if (pid_ns->nr_hashed == init_pids) break; schedule(); diff --git a/kernel/sched/core_ctl.c b/kernel/sched/core_ctl.c index 0b5f2dea18a1..ce15ae7fe76b 100644 --- a/kernel/sched/core_ctl.c +++ b/kernel/sched/core_ctl.c @@ -39,11 +39,13 @@ struct cluster_data { cpumask_t cpu_mask; unsigned int need_cpus; unsigned int task_thres; + unsigned int max_nr; s64 need_ts; struct list_head lru; bool pending; spinlock_t pending_lock; bool is_big_cluster; + bool enable; int nrrun; bool nrrun_changed; struct task_struct *core_ctl_thread; @@ -60,6 +62,7 @@ struct cpu_data { struct cluster_data *cluster; struct list_head sib; bool isolated_by_us; + unsigned int max_nr; }; static DEFINE_PER_CPU(struct cpu_data, cpu_state); @@ -244,6 +247,29 @@ static ssize_t show_is_big_cluster(const struct cluster_data *state, char *buf) return snprintf(buf, PAGE_SIZE, "%u\n", state->is_big_cluster); } +static ssize_t store_enable(struct cluster_data *state, + const char *buf, size_t count) +{ + unsigned int val; + bool bval; + + if (sscanf(buf, "%u\n", &val) != 1) + return -EINVAL; + + bval = !!val; + if (bval != state->enable) { + state->enable = bval; + apply_need(state); + } + + return count; +} + +static ssize_t show_enable(const struct cluster_data *state, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%u\n", state->enable); +} + static ssize_t show_need_cpus(const struct cluster_data *state, char *buf) { return snprintf(buf, PAGE_SIZE, "%u\n", state->need_cpus); @@ -372,6 +398,7 @@ core_ctl_attr_ro(need_cpus); core_ctl_attr_ro(active_cpus); core_ctl_attr_ro(global_state); core_ctl_attr_rw(not_preferred); +core_ctl_attr_rw(enable); static struct attribute *default_attrs[] = { &min_cpus.attr, @@ -381,6 +408,7 @@ static struct attribute *default_attrs[] = { &busy_down_thres.attr, &task_thres.attr, &is_big_cluster.attr, + &enable.attr, &need_cpus.attr, &active_cpus.attr, &global_state.attr, @@ -429,7 +457,6 @@ static struct kobj_type ktype_core_ctl = { #define RQ_AVG_TOLERANCE 2 #define RQ_AVG_DEFAULT_MS 20 -#define NR_RUNNING_TOLERANCE 5 static unsigned int rq_avg_period_ms = RQ_AVG_DEFAULT_MS; static s64 rq_avg_timestamp_ms; @@ -437,6 +464,7 @@ static s64 rq_avg_timestamp_ms; static void update_running_avg(bool trigger_update) { int avg, iowait_avg, big_avg, old_nrrun; + int old_max_nr, max_nr, big_max_nr; s64 now; unsigned long flags; struct cluster_data *cluster; @@ -450,40 +478,23 @@ static void update_running_avg(bool trigger_update) return; } rq_avg_timestamp_ms = now; - sched_get_nr_running_avg(&avg, &iowait_avg, &big_avg); + sched_get_nr_running_avg(&avg, &iowait_avg, &big_avg, + &max_nr, &big_max_nr); spin_unlock_irqrestore(&state_lock, flags); - /* - * Round up to the next integer if the average nr running tasks - * is within NR_RUNNING_TOLERANCE/100 of the next integer. - * If normal rounding up is used, it will allow a transient task - * to trigger online event. By the time core is onlined, the task - * has finished. - * Rounding to closest suffers same problem because scheduler - * might only provide running stats per jiffy, and a transient - * task could skew the number for one jiffy. If core control - * samples every 2 jiffies, it will observe 0.5 additional running - * average which rounds up to 1 task. - */ - avg = (avg + NR_RUNNING_TOLERANCE) / 100; - big_avg = (big_avg + NR_RUNNING_TOLERANCE) / 100; - for_each_cluster(cluster, index) { if (!cluster->inited) continue; + old_nrrun = cluster->nrrun; - /* - * Big cluster only need to take care of big tasks, but if - * there are not enough big cores, big tasks need to be run - * on little as well. Thus for little's runqueue stat, it - * has to use overall runqueue average, or derive what big - * tasks would have to be run on little. The latter approach - * is not easy to get given core control reacts much slower - * than scheduler, and can't predict scheduler's behavior. - */ + old_max_nr = cluster->max_nr; cluster->nrrun = cluster->is_big_cluster ? big_avg : avg; - if (cluster->nrrun != old_nrrun) { + cluster->max_nr = cluster->is_big_cluster ? big_max_nr : max_nr; + + if (cluster->nrrun != old_nrrun || + cluster->max_nr != old_max_nr) { + if (trigger_update) apply_need(cluster); else @@ -493,6 +504,7 @@ static void update_running_avg(bool trigger_update) return; } +#define MAX_NR_THRESHOLD 4 /* adjust needed CPUs based on current runqueue information */ static unsigned int apply_task_need(const struct cluster_data *cluster, unsigned int new_need) @@ -503,7 +515,15 @@ static unsigned int apply_task_need(const struct cluster_data *cluster, /* only unisolate more cores if there are tasks to run */ if (cluster->nrrun > new_need) - return new_need + 1; + new_need = new_need + 1; + + /* + * We don't want tasks to be overcrowded in a cluster. + * If any CPU has more than MAX_NR_THRESHOLD in the last + * window, bring another CPU to help out. + */ + if (cluster->max_nr > MAX_NR_THRESHOLD) + new_need = new_need + 1; return new_need; } @@ -549,7 +569,7 @@ static bool eval_need(struct cluster_data *cluster) spin_lock_irqsave(&state_lock, flags); - if (cluster->boost) { + if (cluster->boost || !cluster->enable) { need_cpus = cluster->max_cpus; } else { cluster->active_cpus = get_active_cpu_count(cluster); @@ -1046,6 +1066,7 @@ static int cluster_init(const struct cpumask *mask) cluster->offline_delay_ms = 100; cluster->task_thres = UINT_MAX; cluster->nrrun = cluster->num_cpus; + cluster->enable = true; INIT_LIST_HEAD(&cluster->lru); spin_lock_init(&cluster->pending_lock); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4d96380b35e8..3b6038225c17 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3536,6 +3536,16 @@ kick_active_balance(struct rq *rq, struct task_struct *p, int new_cpu) static DEFINE_RAW_SPINLOCK(migration_lock); +static bool do_migration(int reason, int new_cpu, int cpu) +{ + if ((reason == UP_MIGRATION || reason == DOWN_MIGRATION) + && same_cluster(new_cpu, cpu)) + return false; + + /* Inter cluster high irqload migrations are OK */ + return new_cpu != cpu; +} + /* * Check if currently running task should be migrated to a better cpu. * @@ -3553,7 +3563,7 @@ void check_for_migration(struct rq *rq, struct task_struct *p) raw_spin_lock(&migration_lock); new_cpu = select_best_cpu(p, cpu, reason, 0); - if (new_cpu != cpu) { + if (do_migration(reason, new_cpu, cpu)) { active_balance = kick_active_balance(rq, p, new_cpu); if (active_balance) mark_reserved(new_cpu); @@ -5102,6 +5112,26 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq) if (!cfs_bandwidth_used()) return; + /* Synchronize hierarchical throttle counter: */ + if (unlikely(!cfs_rq->throttle_uptodate)) { + struct rq *rq = rq_of(cfs_rq); + struct cfs_rq *pcfs_rq; + struct task_group *tg; + + cfs_rq->throttle_uptodate = 1; + + /* Get closest up-to-date node, because leaves go first: */ + for (tg = cfs_rq->tg->parent; tg; tg = tg->parent) { + pcfs_rq = tg->cfs_rq[cpu_of(rq)]; + if (pcfs_rq->throttle_uptodate) + break; + } + if (tg) { + cfs_rq->throttle_count = pcfs_rq->throttle_count; + cfs_rq->throttled_clock_task = rq_clock_task(rq); + } + } + /* an active group must be handled by the update_curr()->put() path */ if (!cfs_rq->runtime_enabled || cfs_rq->curr) return; @@ -5492,15 +5522,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) /* Don't dequeue parent if it has other entities besides us */ if (cfs_rq->load.weight) { + /* Avoid re-evaluating load for this entity: */ + se = parent_entity(se); /* * Bias pick_next to pick a task from this cfs_rq, as * p is sleeping when it is within its sched_slice. */ - if (task_sleep && parent_entity(se)) - set_next_buddy(parent_entity(se)); - - /* avoid re-evaluating load for this entity */ - se = parent_entity(se); + if (task_sleep && se && !throttled_hierarchy(cfs_rq)) + set_next_buddy(se); break; } flags |= DEQUEUE_SLEEP; diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c index df47c26ab6d2..ae6876e62c0f 100644 --- a/kernel/sched/hmp.c +++ b/kernel/sched/hmp.c @@ -1602,7 +1602,7 @@ unsigned int nr_eligible_big_tasks(int cpu) int nr_big = rq->hmp_stats.nr_big_tasks; int nr = rq->nr_running; - if (cpu_max_possible_capacity(cpu) != max_possible_capacity) + if (!is_max_capacity_cpu(cpu)) return nr_big; return nr; @@ -2521,10 +2521,42 @@ static inline u32 predict_and_update_buckets(struct rq *rq, return pred_demand; } -static void update_task_cpu_cycles(struct task_struct *p, int cpu) +#define THRESH_CC_UPDATE (2 * NSEC_PER_USEC) + +/* + * Assumes rq_lock is held and wallclock was recorded in the same critical + * section as this function's invocation. + */ +static inline u64 read_cycle_counter(int cpu, u64 wallclock) +{ + struct sched_cluster *cluster = cpu_rq(cpu)->cluster; + u64 delta; + + if (unlikely(!cluster)) + return cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu); + + /* + * Why don't we need locking here? Let's say that delta is negative + * because some other CPU happened to update last_cc_update with a + * more recent timestamp. We simply read the conter again in that case + * with no harmful side effects. This can happen if there is an FIQ + * between when we read the wallclock and when we use it here. + */ + delta = wallclock - atomic64_read(&cluster->last_cc_update); + if (delta > THRESH_CC_UPDATE) { + atomic64_set(&cluster->cycles, + cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu)); + atomic64_set(&cluster->last_cc_update, wallclock); + } + + return atomic64_read(&cluster->cycles); +} + +static void update_task_cpu_cycles(struct task_struct *p, int cpu, + u64 wallclock) { if (use_cycle_counter) - p->cpu_cycles = cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu); + p->cpu_cycles = read_cycle_counter(cpu, wallclock); } static void @@ -2542,7 +2574,7 @@ update_task_rq_cpu_cycles(struct task_struct *p, struct rq *rq, int event, return; } - cur_cycles = cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu); + cur_cycles = read_cycle_counter(cpu, wallclock); /* * If current task is idle task and irqtime == 0 CPU was @@ -2579,7 +2611,8 @@ update_task_rq_cpu_cycles(struct task_struct *p, struct rq *rq, int event, trace_sched_get_task_cpu_cycles(cpu, event, rq->cc.cycles, rq->cc.time); } -static int account_busy_for_task_demand(struct task_struct *p, int event) +static int +account_busy_for_task_demand(struct rq *rq, struct task_struct *p, int event) { /* * No need to bother updating task demand for exiting tasks @@ -2598,6 +2631,17 @@ static int account_busy_for_task_demand(struct task_struct *p, int event) (event == PICK_NEXT_TASK || event == TASK_MIGRATE))) return 0; + /* + * TASK_UPDATE can be called on sleeping task, when its moved between + * related groups + */ + if (event == TASK_UPDATE) { + if (rq->curr == p) + return 1; + + return p->on_rq ? SCHED_ACCOUNT_WAIT_TIME : 0; + } + return 1; } @@ -2738,7 +2782,7 @@ static u64 update_task_demand(struct task_struct *p, struct rq *rq, u64 runtime; new_window = mark_start < window_start; - if (!account_busy_for_task_demand(p, event)) { + if (!account_busy_for_task_demand(rq, p, event)) { if (new_window) /* * If the time accounted isn't being accounted as @@ -2822,7 +2866,7 @@ void update_task_ravg(struct task_struct *p, struct rq *rq, int event, update_window_start(rq, wallclock); if (!p->ravg.mark_start) { - update_task_cpu_cycles(p, cpu_of(rq)); + update_task_cpu_cycles(p, cpu_of(rq), wallclock); goto done; } @@ -2890,7 +2934,7 @@ void sched_account_irqstart(int cpu, struct task_struct *curr, u64 wallclock) if (is_idle_task(curr)) { /* We're here without rq->lock held, IRQ disabled */ raw_spin_lock(&rq->lock); - update_task_cpu_cycles(curr, cpu); + update_task_cpu_cycles(curr, cpu, sched_ktime_clock()); raw_spin_unlock(&rq->lock); } } @@ -2935,7 +2979,7 @@ void mark_task_starting(struct task_struct *p) p->ravg.mark_start = p->last_wake_ts = wallclock; p->last_cpu_selected_ts = wallclock; p->last_switch_out_ts = 0; - update_task_cpu_cycles(p, cpu_of(rq)); + update_task_cpu_cycles(p, cpu_of(rq), wallclock); } void set_window_start(struct rq *rq) @@ -3548,7 +3592,7 @@ void fixup_busy_time(struct task_struct *p, int new_cpu) update_task_ravg(p, task_rq(p), TASK_MIGRATE, wallclock, 0); - update_task_cpu_cycles(p, new_cpu); + update_task_cpu_cycles(p, new_cpu, wallclock); new_task = is_new_task(p); /* Protected by rq_lock */ @@ -4303,8 +4347,20 @@ void note_task_waking(struct task_struct *p, u64 wallclock) { u64 sleep_time = wallclock - p->last_switch_out_ts; - p->last_wake_ts = wallclock; + /* + * When a short burst and short sleeping task goes for a long + * sleep, the task's avg_sleep_time gets boosted. It will not + * come below short_sleep threshold for a lot of time and it + * results in incorrect packing. The idead behind tracking + * avg_sleep_time is to detect if a task is short sleeping + * or not. So limit the sleep time to twice the short sleep + * threshold. For regular long sleeping tasks, the avg_sleep_time + * would be higher than threshold, and packing happens correctly. + */ + sleep_time = min_t(u64, sleep_time, 2 * sysctl_sched_short_sleep); update_avg(&p->ravg.avg_sleep_time, sleep_time); + + p->last_wake_ts = wallclock; } #ifdef CONFIG_CGROUP_SCHED diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 276a2387f06f..b88f647ea935 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -397,6 +397,8 @@ struct sched_cluster { unsigned int static_cluster_pwr_cost; int notifier_sent; bool wake_up_idle; + atomic64_t last_cc_update; + atomic64_t cycles; }; extern unsigned long all_cluster_ids[]; @@ -509,7 +511,7 @@ struct cfs_rq { u64 throttled_clock, throttled_clock_task; u64 throttled_clock_task_time; - int throttled, throttle_count; + int throttled, throttle_count, throttle_uptodate; struct list_head throttled_list; #endif /* CONFIG_CFS_BANDWIDTH */ #endif /* CONFIG_FAIR_GROUP_SCHED */ @@ -1225,6 +1227,11 @@ static inline bool hmp_capable(void) return max_possible_capacity != min_max_possible_capacity; } +static inline bool is_max_capacity_cpu(int cpu) +{ + return cpu_max_possible_capacity(cpu) == max_possible_capacity; +} + /* * 'load' is in reference to "best cpu" at its best frequency. * Scale that in reference to a given cpu, accounting for how bad it is @@ -1601,6 +1608,8 @@ static inline unsigned int nr_eligible_big_tasks(int cpu) return 0; } +static inline bool is_max_capacity_cpu(int cpu) { return true; } + static inline int pct_task_load(struct task_struct *p) { return 0; } static inline int cpu_capacity(int cpu) diff --git a/kernel/sched/sched_avg.c b/kernel/sched/sched_avg.c index 29d8a26a78ed..ba5a326a9fd8 100644 --- a/kernel/sched/sched_avg.c +++ b/kernel/sched/sched_avg.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2012, 2015-2016, The Linux Foundation. All rights reserved. +/* Copyright (c) 2012, 2015-2017, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -26,11 +26,13 @@ static DEFINE_PER_CPU(u64, nr_prod_sum); static DEFINE_PER_CPU(u64, last_time); static DEFINE_PER_CPU(u64, nr_big_prod_sum); static DEFINE_PER_CPU(u64, nr); +static DEFINE_PER_CPU(u64, nr_max); static DEFINE_PER_CPU(unsigned long, iowait_prod_sum); static DEFINE_PER_CPU(spinlock_t, nr_lock) = __SPIN_LOCK_UNLOCKED(nr_lock); static s64 last_get_time; +#define DIV64_U64_ROUNDUP(X, Y) div64_u64((X) + (Y - 1), Y) /** * sched_get_nr_running_avg * @return: Average nr_running, iowait and nr_big_tasks value since last poll. @@ -40,7 +42,8 @@ static s64 last_get_time; * Obtains the average nr_running value since the last poll. * This function may not be called concurrently with itself */ -void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg) +void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg, + unsigned int *max_nr, unsigned int *big_max_nr) { int cpu; u64 curr_time = sched_clock(); @@ -50,6 +53,8 @@ void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg) *avg = 0; *iowait_avg = 0; *big_avg = 0; + *max_nr = 0; + *big_max_nr = 0; if (!diff) return; @@ -78,17 +83,35 @@ void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg) per_cpu(nr_big_prod_sum, cpu) = 0; per_cpu(iowait_prod_sum, cpu) = 0; + if (*max_nr < per_cpu(nr_max, cpu)) + *max_nr = per_cpu(nr_max, cpu); + + if (is_max_capacity_cpu(cpu)) { + if (*big_max_nr < per_cpu(nr_max, cpu)) + *big_max_nr = per_cpu(nr_max, cpu); + } + + per_cpu(nr_max, cpu) = per_cpu(nr, cpu); spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags); } diff = curr_time - last_get_time; last_get_time = curr_time; - *avg = (int)div64_u64(tmp_avg * 100, diff); - *big_avg = (int)div64_u64(tmp_big_avg * 100, diff); - *iowait_avg = (int)div64_u64(tmp_iowait * 100, diff); - - trace_sched_get_nr_running_avg(*avg, *big_avg, *iowait_avg); + /* + * Any task running on BIG cluster and BIG tasks running on little + * cluster contributes to big_avg. Small or medium tasks can also + * run on BIG cluster when co-location and scheduler boost features + * are activated. We don't want these tasks to downmigrate to little + * cluster when BIG CPUs are available but isolated. Round up the + * average values so that core_ctl aggressively unisolate BIG CPUs. + */ + *avg = (int)DIV64_U64_ROUNDUP(tmp_avg, diff); + *big_avg = (int)DIV64_U64_ROUNDUP(tmp_big_avg, diff); + *iowait_avg = (int)DIV64_U64_ROUNDUP(tmp_iowait, diff); + + trace_sched_get_nr_running_avg(*avg, *big_avg, *iowait_avg, + *max_nr, *big_max_nr); BUG_ON(*avg < 0 || *big_avg < 0 || *iowait_avg < 0); pr_debug("%s - avg:%d big_avg:%d iowait_avg:%d\n", @@ -121,6 +144,9 @@ void sched_update_nr_prod(int cpu, long delta, bool inc) BUG_ON((s64)per_cpu(nr, cpu) < 0); + if (per_cpu(nr, cpu) > per_cpu(nr_max, cpu)) + per_cpu(nr_max, cpu) = per_cpu(nr, cpu); + per_cpu(nr_prod_sum, cpu) += nr_running * diff; per_cpu(nr_big_prod_sum, cpu) += nr_eligible_big_tasks(cpu) * diff; per_cpu(iowait_prod_sum, cpu) += nr_iowait_cpu(cpu) * diff; diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 5819ca07a22b..b9b881ebaff3 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -9,7 +9,6 @@ ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y) endif obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o -obj-$(CONFIG_TIMER_STATS) += timer_stats.o obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o obj-$(CONFIG_TEST_UDELAY) += test_udelay.o diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index bf7fc4989e5c..e7c2392666cb 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -49,7 +49,6 @@ #include <linux/sched/deadline.h> #include <linux/timer.h> #include <linux/freezer.h> -#include <linux/delay.h> #include <asm/uaccess.h> @@ -782,34 +781,6 @@ void hrtimers_resume(void) clock_was_set_delayed(); } -static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) -{ -#ifdef CONFIG_TIMER_STATS - if (timer->start_site) - return; - timer->start_site = __builtin_return_address(0); - memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); - timer->start_pid = current->pid; -#endif -} - -static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) -{ -#ifdef CONFIG_TIMER_STATS - timer->start_site = NULL; -#endif -} - -static inline void timer_stats_account_hrtimer(struct hrtimer *timer) -{ -#ifdef CONFIG_TIMER_STATS - if (likely(!timer_stats_active)) - return; - timer_stats_update_stats(timer, timer->start_pid, timer->start_site, - timer->function, timer->start_comm, 0); -#endif -} - /* * Counterpart to lock_hrtimer_base above: */ @@ -953,7 +924,6 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest * rare case and less expensive than a smp call. */ debug_deactivate(timer); - timer_stats_hrtimer_clear_start_info(timer); reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); if (!restart) @@ -1012,8 +982,6 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, /* Switch the timer base, if necessary: */ new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); - timer_stats_hrtimer_set_start_info(timer); - /* Update pinned state */ timer->state &= ~HRTIMER_STATE_PINNED; timer->state |= (!!(mode & HRTIMER_MODE_PINNED)) << HRTIMER_PINNED_SHIFT; @@ -1154,12 +1122,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, base = hrtimer_clockid_to_base(clock_id); timer->base = &cpu_base->clock_base[base]; timerqueue_init(&timer->node); - -#ifdef CONFIG_TIMER_STATS - timer->start_site = NULL; - timer->start_pid = -1; - memset(timer->start_comm, 0, TASK_COMM_LEN); -#endif } /** @@ -1243,7 +1205,6 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, raw_write_seqcount_barrier(&cpu_base->seq); __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); - timer_stats_account_hrtimer(timer); fn = timer->function; /* @@ -1631,42 +1592,22 @@ static void init_hrtimers_cpu(int cpu) } #if defined(CONFIG_HOTPLUG_CPU) -static void migrate_hrtimer_list(struct hrtimer_cpu_base *old_base, - struct hrtimer_cpu_base *new_base, - unsigned int i, - bool wait, +static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, + struct hrtimer_clock_base *new_base, bool remove_pinned) { struct hrtimer *timer; struct timerqueue_node *node; struct timerqueue_head pinned; int is_pinned; - struct hrtimer_clock_base *old_c_base = &old_base->clock_base[i]; - struct hrtimer_clock_base *new_c_base = &new_base->clock_base[i]; + bool is_hotplug = !cpu_online(old_base->cpu_base->cpu); timerqueue_init_head(&pinned); - while ((node = timerqueue_getnext(&old_c_base->active))) { + while ((node = timerqueue_getnext(&old_base->active))) { timer = container_of(node, struct hrtimer, node); - if (wait) { - /* Ensure timers are done running before continuing */ - while (hrtimer_callback_running(timer)) { - raw_spin_unlock(&old_base->lock); - raw_spin_unlock(&new_base->lock); - cpu_relax(); - /* - * cpu_relax may just be a barrier. Grant the - * run_hrtimer_list code some time to obtain the - * spinlock. - */ - udelay(2); - raw_spin_lock(&new_base->lock); - raw_spin_lock_nested(&old_base->lock, - SINGLE_DEPTH_NESTING); - } - } else { + if (is_hotplug) BUG_ON(hrtimer_callback_running(timer)); - } debug_deactivate(timer); /* @@ -1674,7 +1615,7 @@ static void migrate_hrtimer_list(struct hrtimer_cpu_base *old_base, * timer could be seen as !active and just vanish away * under us on another CPU */ - __remove_hrtimer(timer, old_c_base, HRTIMER_STATE_ENQUEUED, 0); + __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0); is_pinned = timer->state & HRTIMER_STATE_PINNED; if (!remove_pinned && is_pinned) { @@ -1682,7 +1623,7 @@ static void migrate_hrtimer_list(struct hrtimer_cpu_base *old_base, continue; } - timer->base = new_c_base; + timer->base = new_base; /* * Enqueue the timers on the new cpu. This does not * reprogram the event device in case the timer @@ -1691,7 +1632,7 @@ static void migrate_hrtimer_list(struct hrtimer_cpu_base *old_base, * sort out already expired timers and reprogram the * event device. */ - enqueue_hrtimer(timer, new_c_base); + enqueue_hrtimer(timer, new_base); } /* Re-queue pinned timers for non-hotplug usecase */ @@ -1699,11 +1640,11 @@ static void migrate_hrtimer_list(struct hrtimer_cpu_base *old_base, timer = container_of(node, struct hrtimer, node); timerqueue_del(&pinned, &timer->node); - enqueue_hrtimer(timer, old_c_base); + enqueue_hrtimer(timer, old_base); } } -static void __migrate_hrtimers(int scpu, bool wait, bool remove_pinned) +static void __migrate_hrtimers(int scpu, bool remove_pinned) { struct hrtimer_cpu_base *old_base, *new_base; unsigned long flags; @@ -1720,8 +1661,8 @@ static void __migrate_hrtimers(int scpu, bool wait, bool remove_pinned) raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { - migrate_hrtimer_list(old_base, new_base, i, wait, - remove_pinned); + migrate_hrtimer_list(&old_base->clock_base[i], + &new_base->clock_base[i], remove_pinned); } raw_spin_unlock(&old_base->lock); @@ -1737,12 +1678,12 @@ static void migrate_hrtimers(int scpu) BUG_ON(cpu_online(scpu)); tick_cancel_sched_timer(scpu); - __migrate_hrtimers(scpu, false, true); + __migrate_hrtimers(scpu, true); } void hrtimer_quiesce_cpu(void *cpup) { - __migrate_hrtimers(*(int *)cpup, true, false); + __migrate_hrtimers(*(int *)cpup, false); } #endif /* CONFIG_HOTPLUG_CPU */ diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 80016b329d94..051544aec37c 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1250,7 +1250,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, cputime_t *newval, cputime_t *oldval) { - unsigned long long now; + unsigned long long now = 0; WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED); cpu_timer_sample_group(clock_idx, tsk, &now); diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 75ce9cbc313d..90a82deece45 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -500,38 +500,6 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) } } -#ifdef CONFIG_TIMER_STATS -void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) -{ - if (timer->start_site) - return; - - timer->start_site = addr; - memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); - timer->start_pid = current->pid; -} - -static void timer_stats_account_timer(struct timer_list *timer) -{ - void *site; - - /* - * start_site can be concurrently reset by - * timer_stats_timer_clear_start_info() - */ - site = READ_ONCE(timer->start_site); - if (likely(!site)) - return; - - timer_stats_update_stats(timer, timer->start_pid, site, - timer->function, timer->start_comm, - timer->flags); -} - -#else -static void timer_stats_account_timer(struct timer_list *timer) {} -#endif - #ifdef CONFIG_DEBUG_OBJECTS_TIMERS static struct debug_obj_descr timer_debug_descr; @@ -734,11 +702,6 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags, timer->entry.pprev = NULL; timer->flags = flags | raw_smp_processor_id(); timer->slack = -1; -#ifdef CONFIG_TIMER_STATS - timer->start_site = NULL; - timer->start_pid = -1; - memset(timer->start_comm, 0, TASK_COMM_LEN); -#endif lockdep_init_map(&timer->lockdep_map, name, key, 0); } @@ -845,7 +808,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned long flags; int ret = 0; - timer_stats_timer_set_start_info(timer); BUG_ON(!timer->function); base = lock_timer_base(timer, &flags); @@ -1044,7 +1006,6 @@ void add_timer_on(struct timer_list *timer, int cpu) struct tvec_base *base; unsigned long flags; - timer_stats_timer_set_start_info(timer); BUG_ON(timer_pending(timer) || !timer->function); /* @@ -1089,7 +1050,6 @@ int del_timer(struct timer_list *timer) debug_assert_init(timer); - timer_stats_timer_clear_start_info(timer); if (timer_pending(timer)) { base = lock_timer_base(timer, &flags); ret = detach_if_pending(timer, base, true); @@ -1117,10 +1077,9 @@ int try_to_del_timer_sync(struct timer_list *timer) base = lock_timer_base(timer, &flags); - if (base->running_timer != timer) { - timer_stats_timer_clear_start_info(timer); + if (base->running_timer != timer) ret = detach_if_pending(timer, base, true); - } + spin_unlock_irqrestore(&base->lock, flags); return ret; @@ -1304,8 +1263,6 @@ static inline void __run_timers(struct tvec_base *base) data = timer->data; irqsafe = timer->flags & TIMER_IRQSAFE; - timer_stats_account_timer(timer); - base->running_timer = timer; detach_expired_timer(timer, base); @@ -1782,7 +1739,6 @@ static void __init init_timer_cpus(void) void __init init_timers(void) { init_timer_cpus(); - init_timer_stats(); timer_register_cpu_notifier(); open_softirq(TIMER_SOFTIRQ, run_timer_softirq); } diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index ba7d8b288bb3..83aa1f867b97 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -62,21 +62,11 @@ static void print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer, int idx, u64 now) { -#ifdef CONFIG_TIMER_STATS - char tmp[TASK_COMM_LEN + 1]; -#endif SEQ_printf(m, " #%d: ", idx); print_name_offset(m, taddr); SEQ_printf(m, ", "); print_name_offset(m, timer->function); SEQ_printf(m, ", S:%02x", timer->state); -#ifdef CONFIG_TIMER_STATS - SEQ_printf(m, ", "); - print_name_offset(m, timer->start_site); - memcpy(tmp, timer->start_comm, TASK_COMM_LEN); - tmp[TASK_COMM_LEN] = 0; - SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); -#endif SEQ_printf(m, "\n"); SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n", (unsigned long long)ktime_to_ns(hrtimer_get_softexpires(timer)), diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c deleted file mode 100644 index 1adecb4b87c8..000000000000 --- a/kernel/time/timer_stats.c +++ /dev/null @@ -1,425 +0,0 @@ -/* - * kernel/time/timer_stats.c - * - * Collect timer usage statistics. - * - * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar - * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> - * - * timer_stats is based on timer_top, a similar functionality which was part of - * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the - * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based - * on dynamic allocation of the statistics entries and linear search based - * lookup combined with a global lock, rather than the static array, hash - * and per-CPU locking which is used by timer_stats. It was written for the - * pre hrtimer kernel code and therefore did not take hrtimers into account. - * Nevertheless it provided the base for the timer_stats implementation and - * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks - * for this effort. - * - * timer_top.c is - * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus - * Written by Daniel Petrini <d.pensator@gmail.com> - * timer_top.c was released under the GNU General Public License version 2 - * - * We export the addresses and counting of timer functions being called, - * the pid and cmdline from the owner process if applicable. - * - * Start/stop data collection: - * # echo [1|0] >/proc/timer_stats - * - * Display the information collected so far: - * # cat /proc/timer_stats - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/proc_fs.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/sched.h> -#include <linux/seq_file.h> -#include <linux/kallsyms.h> - -#include <asm/uaccess.h> - -/* - * This is our basic unit of interest: a timer expiry event identified - * by the timer, its start/expire functions and the PID of the task that - * started the timer. We count the number of times an event happens: - */ -struct entry { - /* - * Hash list: - */ - struct entry *next; - - /* - * Hash keys: - */ - void *timer; - void *start_func; - void *expire_func; - pid_t pid; - - /* - * Number of timeout events: - */ - unsigned long count; - u32 flags; - - /* - * We save the command-line string to preserve - * this information past task exit: - */ - char comm[TASK_COMM_LEN + 1]; - -} ____cacheline_aligned_in_smp; - -/* - * Spinlock protecting the tables - not taken during lookup: - */ -static DEFINE_RAW_SPINLOCK(table_lock); - -/* - * Per-CPU lookup locks for fast hash lookup: - */ -static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock); - -/* - * Mutex to serialize state changes with show-stats activities: - */ -static DEFINE_MUTEX(show_mutex); - -/* - * Collection status, active/inactive: - */ -int __read_mostly timer_stats_active; - -/* - * Beginning/end timestamps of measurement: - */ -static ktime_t time_start, time_stop; - -/* - * tstat entry structs only get allocated while collection is - * active and never freed during that time - this simplifies - * things quite a bit. - * - * They get freed when a new collection period is started. - */ -#define MAX_ENTRIES_BITS 10 -#define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS) - -static unsigned long nr_entries; -static struct entry entries[MAX_ENTRIES]; - -static atomic_t overflow_count; - -/* - * The entries are in a hash-table, for fast lookup: - */ -#define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1) -#define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS) -#define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1) - -#define __tstat_hashfn(entry) \ - (((unsigned long)(entry)->timer ^ \ - (unsigned long)(entry)->start_func ^ \ - (unsigned long)(entry)->expire_func ^ \ - (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK) - -#define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry)) - -static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly; - -static void reset_entries(void) -{ - nr_entries = 0; - memset(entries, 0, sizeof(entries)); - memset(tstat_hash_table, 0, sizeof(tstat_hash_table)); - atomic_set(&overflow_count, 0); -} - -static struct entry *alloc_entry(void) -{ - if (nr_entries >= MAX_ENTRIES) - return NULL; - - return entries + nr_entries++; -} - -static int match_entries(struct entry *entry1, struct entry *entry2) -{ - return entry1->timer == entry2->timer && - entry1->start_func == entry2->start_func && - entry1->expire_func == entry2->expire_func && - entry1->pid == entry2->pid; -} - -/* - * Look up whether an entry matching this item is present - * in the hash already. Must be called with irqs off and the - * lookup lock held: - */ -static struct entry *tstat_lookup(struct entry *entry, char *comm) -{ - struct entry **head, *curr, *prev; - - head = tstat_hashentry(entry); - curr = *head; - - /* - * The fastpath is when the entry is already hashed, - * we do this with the lookup lock held, but with the - * table lock not held: - */ - while (curr) { - if (match_entries(curr, entry)) - return curr; - - curr = curr->next; - } - /* - * Slowpath: allocate, set up and link a new hash entry: - */ - prev = NULL; - curr = *head; - - raw_spin_lock(&table_lock); - /* - * Make sure we have not raced with another CPU: - */ - while (curr) { - if (match_entries(curr, entry)) - goto out_unlock; - - prev = curr; - curr = curr->next; - } - - curr = alloc_entry(); - if (curr) { - *curr = *entry; - curr->count = 0; - curr->next = NULL; - memcpy(curr->comm, comm, TASK_COMM_LEN); - - smp_mb(); /* Ensure that curr is initialized before insert */ - - if (prev) - prev->next = curr; - else - *head = curr; - } - out_unlock: - raw_spin_unlock(&table_lock); - - return curr; -} - -/** - * timer_stats_update_stats - Update the statistics for a timer. - * @timer: pointer to either a timer_list or a hrtimer - * @pid: the pid of the task which set up the timer - * @startf: pointer to the function which did the timer setup - * @timerf: pointer to the timer callback function of the timer - * @comm: name of the process which set up the timer - * @tflags: The flags field of the timer - * - * When the timer is already registered, then the event counter is - * incremented. Otherwise the timer is registered in a free slot. - */ -void timer_stats_update_stats(void *timer, pid_t pid, void *startf, - void *timerf, char *comm, u32 tflags) -{ - /* - * It doesn't matter which lock we take: - */ - raw_spinlock_t *lock; - struct entry *entry, input; - unsigned long flags; - - if (likely(!timer_stats_active)) - return; - - lock = &per_cpu(tstats_lookup_lock, raw_smp_processor_id()); - - input.timer = timer; - input.start_func = startf; - input.expire_func = timerf; - input.pid = pid; - input.flags = tflags; - - raw_spin_lock_irqsave(lock, flags); - if (!timer_stats_active) - goto out_unlock; - - entry = tstat_lookup(&input, comm); - if (likely(entry)) - entry->count++; - else - atomic_inc(&overflow_count); - - out_unlock: - raw_spin_unlock_irqrestore(lock, flags); -} - -static void print_name_offset(struct seq_file *m, unsigned long addr) -{ - char symname[KSYM_NAME_LEN]; - - if (lookup_symbol_name(addr, symname) < 0) - seq_printf(m, "<%p>", (void *)addr); - else - seq_printf(m, "%s", symname); -} - -static int tstats_show(struct seq_file *m, void *v) -{ - struct timespec period; - struct entry *entry; - unsigned long ms; - long events = 0; - ktime_t time; - int i; - - mutex_lock(&show_mutex); - /* - * If still active then calculate up to now: - */ - if (timer_stats_active) - time_stop = ktime_get(); - - time = ktime_sub(time_stop, time_start); - - period = ktime_to_timespec(time); - ms = period.tv_nsec / 1000000; - - seq_puts(m, "Timer Stats Version: v0.3\n"); - seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); - if (atomic_read(&overflow_count)) - seq_printf(m, "Overflow: %d entries\n", atomic_read(&overflow_count)); - seq_printf(m, "Collection: %s\n", timer_stats_active ? "active" : "inactive"); - - for (i = 0; i < nr_entries; i++) { - entry = entries + i; - if (entry->flags & TIMER_DEFERRABLE) { - seq_printf(m, "%4luD, %5d %-16s ", - entry->count, entry->pid, entry->comm); - } else { - seq_printf(m, " %4lu, %5d %-16s ", - entry->count, entry->pid, entry->comm); - } - - print_name_offset(m, (unsigned long)entry->start_func); - seq_puts(m, " ("); - print_name_offset(m, (unsigned long)entry->expire_func); - seq_puts(m, ")\n"); - - events += entry->count; - } - - ms += period.tv_sec * 1000; - if (!ms) - ms = 1; - - if (events && period.tv_sec) - seq_printf(m, "%ld total events, %ld.%03ld events/sec\n", - events, events * 1000 / ms, - (events * 1000000 / ms) % 1000); - else - seq_printf(m, "%ld total events\n", events); - - mutex_unlock(&show_mutex); - - return 0; -} - -/* - * After a state change, make sure all concurrent lookup/update - * activities have stopped: - */ -static void sync_access(void) -{ - unsigned long flags; - int cpu; - - for_each_online_cpu(cpu) { - raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu); - - raw_spin_lock_irqsave(lock, flags); - /* nothing */ - raw_spin_unlock_irqrestore(lock, flags); - } -} - -static ssize_t tstats_write(struct file *file, const char __user *buf, - size_t count, loff_t *offs) -{ - char ctl[2]; - - if (count != 2 || *offs) - return -EINVAL; - - if (copy_from_user(ctl, buf, count)) - return -EFAULT; - - mutex_lock(&show_mutex); - switch (ctl[0]) { - case '0': - if (timer_stats_active) { - timer_stats_active = 0; - time_stop = ktime_get(); - sync_access(); - } - break; - case '1': - if (!timer_stats_active) { - reset_entries(); - time_start = ktime_get(); - smp_mb(); - timer_stats_active = 1; - } - break; - default: - count = -EINVAL; - } - mutex_unlock(&show_mutex); - - return count; -} - -static int tstats_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, tstats_show, NULL); -} - -static const struct file_operations tstats_fops = { - .open = tstats_open, - .read = seq_read, - .write = tstats_write, - .llseek = seq_lseek, - .release = single_release, -}; - -void __init init_timer_stats(void) -{ - int cpu; - - for_each_possible_cpu(cpu) - raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu)); -} - -static int __init init_tstats_procfs(void) -{ - struct proc_dir_entry *pe; - - pe = proc_create("timer_stats", 0644, NULL, &tstats_fops); - if (!pe) - return -ENOMEM; - return 0; -} -__initcall(init_tstats_procfs); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c9956440d0e6..12ea4ea619ee 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1471,6 +1471,11 @@ static __init int kprobe_trace_self_tests_init(void) end: release_all_trace_kprobes(); + /* + * Wait for the optimizer work to finish. Otherwise it might fiddle + * with probes in already freed __init text. + */ + wait_for_kprobe_optimizer(); if (warn) pr_cont("NG: Some tests are failed. Please check them.\n"); else diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c0ab232e3abd..73c018d7df00 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1508,8 +1508,6 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, return; } - timer_stats_timer_set_start_info(&dwork->timer); - dwork->wq = wq; dwork->cpu = cpu; timer->expires = jiffies + delay; |