summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/fork.c8
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/irq/cpuhotplug.c2
-rw-r--r--kernel/irq/msi.c2
-rw-r--r--kernel/kprobes.c2
-rw-r--r--kernel/padata.c2
-rw-r--r--kernel/pid_namespace.c2
-rw-r--r--kernel/sched/core_ctl.c79
-rw-r--r--kernel/sched/fair.c41
-rw-r--r--kernel/sched/hmp.c78
-rw-r--r--kernel/sched/sched.h11
-rw-r--r--kernel/sched/sched_avg.c40
-rw-r--r--kernel/time/Makefile1
-rw-r--r--kernel/time/hrtimer.c87
-rw-r--r--kernel/time/posix-cpu-timers.c2
-rw-r--r--kernel/time/timer.c48
-rw-r--r--kernel/time/timer_list.c10
-rw-r--r--kernel/time/timer_stats.c425
-rw-r--r--kernel/trace/trace_kprobe.c5
-rw-r--r--kernel/workqueue.c2
20 files changed, 229 insertions, 620 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 622571d2a833..2845c5bdc8e3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1596,11 +1596,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
*/
recalc_sigpending();
if (signal_pending(current)) {
- spin_unlock(&current->sighand->siglock);
- write_unlock_irq(&tasklist_lock);
retval = -ERESTARTNOINTR;
goto bad_fork_cancel_cgroup;
}
+ if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) {
+ retval = -ENOMEM;
+ goto bad_fork_cancel_cgroup;
+ }
if (likely(p->pid)) {
ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
@@ -1651,6 +1653,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
return p;
bad_fork_cancel_cgroup:
+ spin_unlock(&current->sighand->siglock);
+ write_unlock_irq(&tasklist_lock);
cgroup_cancel_fork(p, cgrp_ss_priv);
bad_fork_free_pid:
threadgroup_change_end(current);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 9812d9c0d483..e0449956298e 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -810,8 +810,8 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
if (!desc)
return;
- __irq_do_set_handler(desc, handle, 1, NULL);
desc->irq_common_data.handler_data = data;
+ __irq_do_set_handler(desc, handle, 1, NULL);
irq_put_desc_busunlock(desc, flags);
}
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index dac3724e4c1e..6c8e154c7384 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -47,7 +47,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
if (!c->irq_set_affinity) {
pr_debug("IRQ%u: unable to set affinity\n", d->irq);
} else {
- int r = irq_do_set_affinity(d, affinity, false);
+ int r = irq_set_affinity_locked(d, affinity, false);
if (r)
pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n",
d->irq, r);
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index cd6009006510..41b40f310c28 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -268,7 +268,7 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
struct msi_domain_ops *ops = info->ops;
msi_alloc_info_t arg;
struct msi_desc *desc;
- int i, ret, virq;
+ int i, ret, virq = 0;
ret = ops->msi_check(domain, info, dev);
if (ret == 0)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index d10ab6b9b5e0..695763516908 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -563,7 +563,7 @@ static void kprobe_optimizer(struct work_struct *work)
}
/* Wait for completing optimization and unoptimization */
-static void wait_for_kprobe_optimizer(void)
+void wait_for_kprobe_optimizer(void)
{
mutex_lock(&kprobe_mutex);
diff --git a/kernel/padata.c b/kernel/padata.c
index 401227e3967c..ecc7b3f452c7 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -357,7 +357,7 @@ static int padata_setup_cpumasks(struct parallel_data *pd,
cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
- free_cpumask_var(pd->cpumask.cbcpu);
+ free_cpumask_var(pd->cpumask.pcpu);
return -ENOMEM;
}
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a65ba137fd15..567ecc826bc8 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -255,7 +255,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
* if reparented.
*/
for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_INTERRUPTIBLE);
if (pid_ns->nr_hashed == init_pids)
break;
schedule();
diff --git a/kernel/sched/core_ctl.c b/kernel/sched/core_ctl.c
index 0b5f2dea18a1..ce15ae7fe76b 100644
--- a/kernel/sched/core_ctl.c
+++ b/kernel/sched/core_ctl.c
@@ -39,11 +39,13 @@ struct cluster_data {
cpumask_t cpu_mask;
unsigned int need_cpus;
unsigned int task_thres;
+ unsigned int max_nr;
s64 need_ts;
struct list_head lru;
bool pending;
spinlock_t pending_lock;
bool is_big_cluster;
+ bool enable;
int nrrun;
bool nrrun_changed;
struct task_struct *core_ctl_thread;
@@ -60,6 +62,7 @@ struct cpu_data {
struct cluster_data *cluster;
struct list_head sib;
bool isolated_by_us;
+ unsigned int max_nr;
};
static DEFINE_PER_CPU(struct cpu_data, cpu_state);
@@ -244,6 +247,29 @@ static ssize_t show_is_big_cluster(const struct cluster_data *state, char *buf)
return snprintf(buf, PAGE_SIZE, "%u\n", state->is_big_cluster);
}
+static ssize_t store_enable(struct cluster_data *state,
+ const char *buf, size_t count)
+{
+ unsigned int val;
+ bool bval;
+
+ if (sscanf(buf, "%u\n", &val) != 1)
+ return -EINVAL;
+
+ bval = !!val;
+ if (bval != state->enable) {
+ state->enable = bval;
+ apply_need(state);
+ }
+
+ return count;
+}
+
+static ssize_t show_enable(const struct cluster_data *state, char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "%u\n", state->enable);
+}
+
static ssize_t show_need_cpus(const struct cluster_data *state, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%u\n", state->need_cpus);
@@ -372,6 +398,7 @@ core_ctl_attr_ro(need_cpus);
core_ctl_attr_ro(active_cpus);
core_ctl_attr_ro(global_state);
core_ctl_attr_rw(not_preferred);
+core_ctl_attr_rw(enable);
static struct attribute *default_attrs[] = {
&min_cpus.attr,
@@ -381,6 +408,7 @@ static struct attribute *default_attrs[] = {
&busy_down_thres.attr,
&task_thres.attr,
&is_big_cluster.attr,
+ &enable.attr,
&need_cpus.attr,
&active_cpus.attr,
&global_state.attr,
@@ -429,7 +457,6 @@ static struct kobj_type ktype_core_ctl = {
#define RQ_AVG_TOLERANCE 2
#define RQ_AVG_DEFAULT_MS 20
-#define NR_RUNNING_TOLERANCE 5
static unsigned int rq_avg_period_ms = RQ_AVG_DEFAULT_MS;
static s64 rq_avg_timestamp_ms;
@@ -437,6 +464,7 @@ static s64 rq_avg_timestamp_ms;
static void update_running_avg(bool trigger_update)
{
int avg, iowait_avg, big_avg, old_nrrun;
+ int old_max_nr, max_nr, big_max_nr;
s64 now;
unsigned long flags;
struct cluster_data *cluster;
@@ -450,40 +478,23 @@ static void update_running_avg(bool trigger_update)
return;
}
rq_avg_timestamp_ms = now;
- sched_get_nr_running_avg(&avg, &iowait_avg, &big_avg);
+ sched_get_nr_running_avg(&avg, &iowait_avg, &big_avg,
+ &max_nr, &big_max_nr);
spin_unlock_irqrestore(&state_lock, flags);
- /*
- * Round up to the next integer if the average nr running tasks
- * is within NR_RUNNING_TOLERANCE/100 of the next integer.
- * If normal rounding up is used, it will allow a transient task
- * to trigger online event. By the time core is onlined, the task
- * has finished.
- * Rounding to closest suffers same problem because scheduler
- * might only provide running stats per jiffy, and a transient
- * task could skew the number for one jiffy. If core control
- * samples every 2 jiffies, it will observe 0.5 additional running
- * average which rounds up to 1 task.
- */
- avg = (avg + NR_RUNNING_TOLERANCE) / 100;
- big_avg = (big_avg + NR_RUNNING_TOLERANCE) / 100;
-
for_each_cluster(cluster, index) {
if (!cluster->inited)
continue;
+
old_nrrun = cluster->nrrun;
- /*
- * Big cluster only need to take care of big tasks, but if
- * there are not enough big cores, big tasks need to be run
- * on little as well. Thus for little's runqueue stat, it
- * has to use overall runqueue average, or derive what big
- * tasks would have to be run on little. The latter approach
- * is not easy to get given core control reacts much slower
- * than scheduler, and can't predict scheduler's behavior.
- */
+ old_max_nr = cluster->max_nr;
cluster->nrrun = cluster->is_big_cluster ? big_avg : avg;
- if (cluster->nrrun != old_nrrun) {
+ cluster->max_nr = cluster->is_big_cluster ? big_max_nr : max_nr;
+
+ if (cluster->nrrun != old_nrrun ||
+ cluster->max_nr != old_max_nr) {
+
if (trigger_update)
apply_need(cluster);
else
@@ -493,6 +504,7 @@ static void update_running_avg(bool trigger_update)
return;
}
+#define MAX_NR_THRESHOLD 4
/* adjust needed CPUs based on current runqueue information */
static unsigned int apply_task_need(const struct cluster_data *cluster,
unsigned int new_need)
@@ -503,7 +515,15 @@ static unsigned int apply_task_need(const struct cluster_data *cluster,
/* only unisolate more cores if there are tasks to run */
if (cluster->nrrun > new_need)
- return new_need + 1;
+ new_need = new_need + 1;
+
+ /*
+ * We don't want tasks to be overcrowded in a cluster.
+ * If any CPU has more than MAX_NR_THRESHOLD in the last
+ * window, bring another CPU to help out.
+ */
+ if (cluster->max_nr > MAX_NR_THRESHOLD)
+ new_need = new_need + 1;
return new_need;
}
@@ -549,7 +569,7 @@ static bool eval_need(struct cluster_data *cluster)
spin_lock_irqsave(&state_lock, flags);
- if (cluster->boost) {
+ if (cluster->boost || !cluster->enable) {
need_cpus = cluster->max_cpus;
} else {
cluster->active_cpus = get_active_cpu_count(cluster);
@@ -1046,6 +1066,7 @@ static int cluster_init(const struct cpumask *mask)
cluster->offline_delay_ms = 100;
cluster->task_thres = UINT_MAX;
cluster->nrrun = cluster->num_cpus;
+ cluster->enable = true;
INIT_LIST_HEAD(&cluster->lru);
spin_lock_init(&cluster->pending_lock);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4d96380b35e8..3b6038225c17 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3536,6 +3536,16 @@ kick_active_balance(struct rq *rq, struct task_struct *p, int new_cpu)
static DEFINE_RAW_SPINLOCK(migration_lock);
+static bool do_migration(int reason, int new_cpu, int cpu)
+{
+ if ((reason == UP_MIGRATION || reason == DOWN_MIGRATION)
+ && same_cluster(new_cpu, cpu))
+ return false;
+
+ /* Inter cluster high irqload migrations are OK */
+ return new_cpu != cpu;
+}
+
/*
* Check if currently running task should be migrated to a better cpu.
*
@@ -3553,7 +3563,7 @@ void check_for_migration(struct rq *rq, struct task_struct *p)
raw_spin_lock(&migration_lock);
new_cpu = select_best_cpu(p, cpu, reason, 0);
- if (new_cpu != cpu) {
+ if (do_migration(reason, new_cpu, cpu)) {
active_balance = kick_active_balance(rq, p, new_cpu);
if (active_balance)
mark_reserved(new_cpu);
@@ -5102,6 +5112,26 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
if (!cfs_bandwidth_used())
return;
+ /* Synchronize hierarchical throttle counter: */
+ if (unlikely(!cfs_rq->throttle_uptodate)) {
+ struct rq *rq = rq_of(cfs_rq);
+ struct cfs_rq *pcfs_rq;
+ struct task_group *tg;
+
+ cfs_rq->throttle_uptodate = 1;
+
+ /* Get closest up-to-date node, because leaves go first: */
+ for (tg = cfs_rq->tg->parent; tg; tg = tg->parent) {
+ pcfs_rq = tg->cfs_rq[cpu_of(rq)];
+ if (pcfs_rq->throttle_uptodate)
+ break;
+ }
+ if (tg) {
+ cfs_rq->throttle_count = pcfs_rq->throttle_count;
+ cfs_rq->throttled_clock_task = rq_clock_task(rq);
+ }
+ }
+
/* an active group must be handled by the update_curr()->put() path */
if (!cfs_rq->runtime_enabled || cfs_rq->curr)
return;
@@ -5492,15 +5522,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
/* Don't dequeue parent if it has other entities besides us */
if (cfs_rq->load.weight) {
+ /* Avoid re-evaluating load for this entity: */
+ se = parent_entity(se);
/*
* Bias pick_next to pick a task from this cfs_rq, as
* p is sleeping when it is within its sched_slice.
*/
- if (task_sleep && parent_entity(se))
- set_next_buddy(parent_entity(se));
-
- /* avoid re-evaluating load for this entity */
- se = parent_entity(se);
+ if (task_sleep && se && !throttled_hierarchy(cfs_rq))
+ set_next_buddy(se);
break;
}
flags |= DEQUEUE_SLEEP;
diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c
index df47c26ab6d2..ae6876e62c0f 100644
--- a/kernel/sched/hmp.c
+++ b/kernel/sched/hmp.c
@@ -1602,7 +1602,7 @@ unsigned int nr_eligible_big_tasks(int cpu)
int nr_big = rq->hmp_stats.nr_big_tasks;
int nr = rq->nr_running;
- if (cpu_max_possible_capacity(cpu) != max_possible_capacity)
+ if (!is_max_capacity_cpu(cpu))
return nr_big;
return nr;
@@ -2521,10 +2521,42 @@ static inline u32 predict_and_update_buckets(struct rq *rq,
return pred_demand;
}
-static void update_task_cpu_cycles(struct task_struct *p, int cpu)
+#define THRESH_CC_UPDATE (2 * NSEC_PER_USEC)
+
+/*
+ * Assumes rq_lock is held and wallclock was recorded in the same critical
+ * section as this function's invocation.
+ */
+static inline u64 read_cycle_counter(int cpu, u64 wallclock)
+{
+ struct sched_cluster *cluster = cpu_rq(cpu)->cluster;
+ u64 delta;
+
+ if (unlikely(!cluster))
+ return cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu);
+
+ /*
+ * Why don't we need locking here? Let's say that delta is negative
+ * because some other CPU happened to update last_cc_update with a
+ * more recent timestamp. We simply read the conter again in that case
+ * with no harmful side effects. This can happen if there is an FIQ
+ * between when we read the wallclock and when we use it here.
+ */
+ delta = wallclock - atomic64_read(&cluster->last_cc_update);
+ if (delta > THRESH_CC_UPDATE) {
+ atomic64_set(&cluster->cycles,
+ cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu));
+ atomic64_set(&cluster->last_cc_update, wallclock);
+ }
+
+ return atomic64_read(&cluster->cycles);
+}
+
+static void update_task_cpu_cycles(struct task_struct *p, int cpu,
+ u64 wallclock)
{
if (use_cycle_counter)
- p->cpu_cycles = cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu);
+ p->cpu_cycles = read_cycle_counter(cpu, wallclock);
}
static void
@@ -2542,7 +2574,7 @@ update_task_rq_cpu_cycles(struct task_struct *p, struct rq *rq, int event,
return;
}
- cur_cycles = cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu);
+ cur_cycles = read_cycle_counter(cpu, wallclock);
/*
* If current task is idle task and irqtime == 0 CPU was
@@ -2579,7 +2611,8 @@ update_task_rq_cpu_cycles(struct task_struct *p, struct rq *rq, int event,
trace_sched_get_task_cpu_cycles(cpu, event, rq->cc.cycles, rq->cc.time);
}
-static int account_busy_for_task_demand(struct task_struct *p, int event)
+static int
+account_busy_for_task_demand(struct rq *rq, struct task_struct *p, int event)
{
/*
* No need to bother updating task demand for exiting tasks
@@ -2598,6 +2631,17 @@ static int account_busy_for_task_demand(struct task_struct *p, int event)
(event == PICK_NEXT_TASK || event == TASK_MIGRATE)))
return 0;
+ /*
+ * TASK_UPDATE can be called on sleeping task, when its moved between
+ * related groups
+ */
+ if (event == TASK_UPDATE) {
+ if (rq->curr == p)
+ return 1;
+
+ return p->on_rq ? SCHED_ACCOUNT_WAIT_TIME : 0;
+ }
+
return 1;
}
@@ -2738,7 +2782,7 @@ static u64 update_task_demand(struct task_struct *p, struct rq *rq,
u64 runtime;
new_window = mark_start < window_start;
- if (!account_busy_for_task_demand(p, event)) {
+ if (!account_busy_for_task_demand(rq, p, event)) {
if (new_window)
/*
* If the time accounted isn't being accounted as
@@ -2822,7 +2866,7 @@ void update_task_ravg(struct task_struct *p, struct rq *rq, int event,
update_window_start(rq, wallclock);
if (!p->ravg.mark_start) {
- update_task_cpu_cycles(p, cpu_of(rq));
+ update_task_cpu_cycles(p, cpu_of(rq), wallclock);
goto done;
}
@@ -2890,7 +2934,7 @@ void sched_account_irqstart(int cpu, struct task_struct *curr, u64 wallclock)
if (is_idle_task(curr)) {
/* We're here without rq->lock held, IRQ disabled */
raw_spin_lock(&rq->lock);
- update_task_cpu_cycles(curr, cpu);
+ update_task_cpu_cycles(curr, cpu, sched_ktime_clock());
raw_spin_unlock(&rq->lock);
}
}
@@ -2935,7 +2979,7 @@ void mark_task_starting(struct task_struct *p)
p->ravg.mark_start = p->last_wake_ts = wallclock;
p->last_cpu_selected_ts = wallclock;
p->last_switch_out_ts = 0;
- update_task_cpu_cycles(p, cpu_of(rq));
+ update_task_cpu_cycles(p, cpu_of(rq), wallclock);
}
void set_window_start(struct rq *rq)
@@ -3548,7 +3592,7 @@ void fixup_busy_time(struct task_struct *p, int new_cpu)
update_task_ravg(p, task_rq(p), TASK_MIGRATE,
wallclock, 0);
- update_task_cpu_cycles(p, new_cpu);
+ update_task_cpu_cycles(p, new_cpu, wallclock);
new_task = is_new_task(p);
/* Protected by rq_lock */
@@ -4303,8 +4347,20 @@ void note_task_waking(struct task_struct *p, u64 wallclock)
{
u64 sleep_time = wallclock - p->last_switch_out_ts;
- p->last_wake_ts = wallclock;
+ /*
+ * When a short burst and short sleeping task goes for a long
+ * sleep, the task's avg_sleep_time gets boosted. It will not
+ * come below short_sleep threshold for a lot of time and it
+ * results in incorrect packing. The idead behind tracking
+ * avg_sleep_time is to detect if a task is short sleeping
+ * or not. So limit the sleep time to twice the short sleep
+ * threshold. For regular long sleeping tasks, the avg_sleep_time
+ * would be higher than threshold, and packing happens correctly.
+ */
+ sleep_time = min_t(u64, sleep_time, 2 * sysctl_sched_short_sleep);
update_avg(&p->ravg.avg_sleep_time, sleep_time);
+
+ p->last_wake_ts = wallclock;
}
#ifdef CONFIG_CGROUP_SCHED
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 276a2387f06f..b88f647ea935 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -397,6 +397,8 @@ struct sched_cluster {
unsigned int static_cluster_pwr_cost;
int notifier_sent;
bool wake_up_idle;
+ atomic64_t last_cc_update;
+ atomic64_t cycles;
};
extern unsigned long all_cluster_ids[];
@@ -509,7 +511,7 @@ struct cfs_rq {
u64 throttled_clock, throttled_clock_task;
u64 throttled_clock_task_time;
- int throttled, throttle_count;
+ int throttled, throttle_count, throttle_uptodate;
struct list_head throttled_list;
#endif /* CONFIG_CFS_BANDWIDTH */
#endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -1225,6 +1227,11 @@ static inline bool hmp_capable(void)
return max_possible_capacity != min_max_possible_capacity;
}
+static inline bool is_max_capacity_cpu(int cpu)
+{
+ return cpu_max_possible_capacity(cpu) == max_possible_capacity;
+}
+
/*
* 'load' is in reference to "best cpu" at its best frequency.
* Scale that in reference to a given cpu, accounting for how bad it is
@@ -1601,6 +1608,8 @@ static inline unsigned int nr_eligible_big_tasks(int cpu)
return 0;
}
+static inline bool is_max_capacity_cpu(int cpu) { return true; }
+
static inline int pct_task_load(struct task_struct *p) { return 0; }
static inline int cpu_capacity(int cpu)
diff --git a/kernel/sched/sched_avg.c b/kernel/sched/sched_avg.c
index 29d8a26a78ed..ba5a326a9fd8 100644
--- a/kernel/sched/sched_avg.c
+++ b/kernel/sched/sched_avg.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, 2015-2016, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2012, 2015-2017, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -26,11 +26,13 @@ static DEFINE_PER_CPU(u64, nr_prod_sum);
static DEFINE_PER_CPU(u64, last_time);
static DEFINE_PER_CPU(u64, nr_big_prod_sum);
static DEFINE_PER_CPU(u64, nr);
+static DEFINE_PER_CPU(u64, nr_max);
static DEFINE_PER_CPU(unsigned long, iowait_prod_sum);
static DEFINE_PER_CPU(spinlock_t, nr_lock) = __SPIN_LOCK_UNLOCKED(nr_lock);
static s64 last_get_time;
+#define DIV64_U64_ROUNDUP(X, Y) div64_u64((X) + (Y - 1), Y)
/**
* sched_get_nr_running_avg
* @return: Average nr_running, iowait and nr_big_tasks value since last poll.
@@ -40,7 +42,8 @@ static s64 last_get_time;
* Obtains the average nr_running value since the last poll.
* This function may not be called concurrently with itself
*/
-void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg)
+void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg,
+ unsigned int *max_nr, unsigned int *big_max_nr)
{
int cpu;
u64 curr_time = sched_clock();
@@ -50,6 +53,8 @@ void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg)
*avg = 0;
*iowait_avg = 0;
*big_avg = 0;
+ *max_nr = 0;
+ *big_max_nr = 0;
if (!diff)
return;
@@ -78,17 +83,35 @@ void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg)
per_cpu(nr_big_prod_sum, cpu) = 0;
per_cpu(iowait_prod_sum, cpu) = 0;
+ if (*max_nr < per_cpu(nr_max, cpu))
+ *max_nr = per_cpu(nr_max, cpu);
+
+ if (is_max_capacity_cpu(cpu)) {
+ if (*big_max_nr < per_cpu(nr_max, cpu))
+ *big_max_nr = per_cpu(nr_max, cpu);
+ }
+
+ per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
}
diff = curr_time - last_get_time;
last_get_time = curr_time;
- *avg = (int)div64_u64(tmp_avg * 100, diff);
- *big_avg = (int)div64_u64(tmp_big_avg * 100, diff);
- *iowait_avg = (int)div64_u64(tmp_iowait * 100, diff);
-
- trace_sched_get_nr_running_avg(*avg, *big_avg, *iowait_avg);
+ /*
+ * Any task running on BIG cluster and BIG tasks running on little
+ * cluster contributes to big_avg. Small or medium tasks can also
+ * run on BIG cluster when co-location and scheduler boost features
+ * are activated. We don't want these tasks to downmigrate to little
+ * cluster when BIG CPUs are available but isolated. Round up the
+ * average values so that core_ctl aggressively unisolate BIG CPUs.
+ */
+ *avg = (int)DIV64_U64_ROUNDUP(tmp_avg, diff);
+ *big_avg = (int)DIV64_U64_ROUNDUP(tmp_big_avg, diff);
+ *iowait_avg = (int)DIV64_U64_ROUNDUP(tmp_iowait, diff);
+
+ trace_sched_get_nr_running_avg(*avg, *big_avg, *iowait_avg,
+ *max_nr, *big_max_nr);
BUG_ON(*avg < 0 || *big_avg < 0 || *iowait_avg < 0);
pr_debug("%s - avg:%d big_avg:%d iowait_avg:%d\n",
@@ -121,6 +144,9 @@ void sched_update_nr_prod(int cpu, long delta, bool inc)
BUG_ON((s64)per_cpu(nr, cpu) < 0);
+ if (per_cpu(nr, cpu) > per_cpu(nr_max, cpu))
+ per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
+
per_cpu(nr_prod_sum, cpu) += nr_running * diff;
per_cpu(nr_big_prod_sum, cpu) += nr_eligible_big_tasks(cpu) * diff;
per_cpu(iowait_prod_sum, cpu) += nr_iowait_cpu(cpu) * diff;
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 5819ca07a22b..b9b881ebaff3 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -9,7 +9,6 @@ ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y)
endif
obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o
obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o
-obj-$(CONFIG_TIMER_STATS) += timer_stats.o
obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index bf7fc4989e5c..e7c2392666cb 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -49,7 +49,6 @@
#include <linux/sched/deadline.h>
#include <linux/timer.h>
#include <linux/freezer.h>
-#include <linux/delay.h>
#include <asm/uaccess.h>
@@ -782,34 +781,6 @@ void hrtimers_resume(void)
clock_was_set_delayed();
}
-static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
-{
-#ifdef CONFIG_TIMER_STATS
- if (timer->start_site)
- return;
- timer->start_site = __builtin_return_address(0);
- memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
- timer->start_pid = current->pid;
-#endif
-}
-
-static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
-{
-#ifdef CONFIG_TIMER_STATS
- timer->start_site = NULL;
-#endif
-}
-
-static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
-{
-#ifdef CONFIG_TIMER_STATS
- if (likely(!timer_stats_active))
- return;
- timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
- timer->function, timer->start_comm, 0);
-#endif
-}
-
/*
* Counterpart to lock_hrtimer_base above:
*/
@@ -953,7 +924,6 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest
* rare case and less expensive than a smp call.
*/
debug_deactivate(timer);
- timer_stats_hrtimer_clear_start_info(timer);
reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
if (!restart)
@@ -1012,8 +982,6 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
/* Switch the timer base, if necessary: */
new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
- timer_stats_hrtimer_set_start_info(timer);
-
/* Update pinned state */
timer->state &= ~HRTIMER_STATE_PINNED;
timer->state |= (!!(mode & HRTIMER_MODE_PINNED)) << HRTIMER_PINNED_SHIFT;
@@ -1154,12 +1122,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
base = hrtimer_clockid_to_base(clock_id);
timer->base = &cpu_base->clock_base[base];
timerqueue_init(&timer->node);
-
-#ifdef CONFIG_TIMER_STATS
- timer->start_site = NULL;
- timer->start_pid = -1;
- memset(timer->start_comm, 0, TASK_COMM_LEN);
-#endif
}
/**
@@ -1243,7 +1205,6 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
raw_write_seqcount_barrier(&cpu_base->seq);
__remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
- timer_stats_account_hrtimer(timer);
fn = timer->function;
/*
@@ -1631,42 +1592,22 @@ static void init_hrtimers_cpu(int cpu)
}
#if defined(CONFIG_HOTPLUG_CPU)
-static void migrate_hrtimer_list(struct hrtimer_cpu_base *old_base,
- struct hrtimer_cpu_base *new_base,
- unsigned int i,
- bool wait,
+static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
+ struct hrtimer_clock_base *new_base,
bool remove_pinned)
{
struct hrtimer *timer;
struct timerqueue_node *node;
struct timerqueue_head pinned;
int is_pinned;
- struct hrtimer_clock_base *old_c_base = &old_base->clock_base[i];
- struct hrtimer_clock_base *new_c_base = &new_base->clock_base[i];
+ bool is_hotplug = !cpu_online(old_base->cpu_base->cpu);
timerqueue_init_head(&pinned);
- while ((node = timerqueue_getnext(&old_c_base->active))) {
+ while ((node = timerqueue_getnext(&old_base->active))) {
timer = container_of(node, struct hrtimer, node);
- if (wait) {
- /* Ensure timers are done running before continuing */
- while (hrtimer_callback_running(timer)) {
- raw_spin_unlock(&old_base->lock);
- raw_spin_unlock(&new_base->lock);
- cpu_relax();
- /*
- * cpu_relax may just be a barrier. Grant the
- * run_hrtimer_list code some time to obtain the
- * spinlock.
- */
- udelay(2);
- raw_spin_lock(&new_base->lock);
- raw_spin_lock_nested(&old_base->lock,
- SINGLE_DEPTH_NESTING);
- }
- } else {
+ if (is_hotplug)
BUG_ON(hrtimer_callback_running(timer));
- }
debug_deactivate(timer);
/*
@@ -1674,7 +1615,7 @@ static void migrate_hrtimer_list(struct hrtimer_cpu_base *old_base,
* timer could be seen as !active and just vanish away
* under us on another CPU
*/
- __remove_hrtimer(timer, old_c_base, HRTIMER_STATE_ENQUEUED, 0);
+ __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0);
is_pinned = timer->state & HRTIMER_STATE_PINNED;
if (!remove_pinned && is_pinned) {
@@ -1682,7 +1623,7 @@ static void migrate_hrtimer_list(struct hrtimer_cpu_base *old_base,
continue;
}
- timer->base = new_c_base;
+ timer->base = new_base;
/*
* Enqueue the timers on the new cpu. This does not
* reprogram the event device in case the timer
@@ -1691,7 +1632,7 @@ static void migrate_hrtimer_list(struct hrtimer_cpu_base *old_base,
* sort out already expired timers and reprogram the
* event device.
*/
- enqueue_hrtimer(timer, new_c_base);
+ enqueue_hrtimer(timer, new_base);
}
/* Re-queue pinned timers for non-hotplug usecase */
@@ -1699,11 +1640,11 @@ static void migrate_hrtimer_list(struct hrtimer_cpu_base *old_base,
timer = container_of(node, struct hrtimer, node);
timerqueue_del(&pinned, &timer->node);
- enqueue_hrtimer(timer, old_c_base);
+ enqueue_hrtimer(timer, old_base);
}
}
-static void __migrate_hrtimers(int scpu, bool wait, bool remove_pinned)
+static void __migrate_hrtimers(int scpu, bool remove_pinned)
{
struct hrtimer_cpu_base *old_base, *new_base;
unsigned long flags;
@@ -1720,8 +1661,8 @@ static void __migrate_hrtimers(int scpu, bool wait, bool remove_pinned)
raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
- migrate_hrtimer_list(old_base, new_base, i, wait,
- remove_pinned);
+ migrate_hrtimer_list(&old_base->clock_base[i],
+ &new_base->clock_base[i], remove_pinned);
}
raw_spin_unlock(&old_base->lock);
@@ -1737,12 +1678,12 @@ static void migrate_hrtimers(int scpu)
BUG_ON(cpu_online(scpu));
tick_cancel_sched_timer(scpu);
- __migrate_hrtimers(scpu, false, true);
+ __migrate_hrtimers(scpu, true);
}
void hrtimer_quiesce_cpu(void *cpup)
{
- __migrate_hrtimers(*(int *)cpup, true, false);
+ __migrate_hrtimers(*(int *)cpup, false);
}
#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 80016b329d94..051544aec37c 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1250,7 +1250,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
cputime_t *newval, cputime_t *oldval)
{
- unsigned long long now;
+ unsigned long long now = 0;
WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED);
cpu_timer_sample_group(clock_idx, tsk, &now);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 75ce9cbc313d..90a82deece45 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -500,38 +500,6 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
}
}
-#ifdef CONFIG_TIMER_STATS
-void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
-{
- if (timer->start_site)
- return;
-
- timer->start_site = addr;
- memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
- timer->start_pid = current->pid;
-}
-
-static void timer_stats_account_timer(struct timer_list *timer)
-{
- void *site;
-
- /*
- * start_site can be concurrently reset by
- * timer_stats_timer_clear_start_info()
- */
- site = READ_ONCE(timer->start_site);
- if (likely(!site))
- return;
-
- timer_stats_update_stats(timer, timer->start_pid, site,
- timer->function, timer->start_comm,
- timer->flags);
-}
-
-#else
-static void timer_stats_account_timer(struct timer_list *timer) {}
-#endif
-
#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
static struct debug_obj_descr timer_debug_descr;
@@ -734,11 +702,6 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags,
timer->entry.pprev = NULL;
timer->flags = flags | raw_smp_processor_id();
timer->slack = -1;
-#ifdef CONFIG_TIMER_STATS
- timer->start_site = NULL;
- timer->start_pid = -1;
- memset(timer->start_comm, 0, TASK_COMM_LEN);
-#endif
lockdep_init_map(&timer->lockdep_map, name, key, 0);
}
@@ -845,7 +808,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
unsigned long flags;
int ret = 0;
- timer_stats_timer_set_start_info(timer);
BUG_ON(!timer->function);
base = lock_timer_base(timer, &flags);
@@ -1044,7 +1006,6 @@ void add_timer_on(struct timer_list *timer, int cpu)
struct tvec_base *base;
unsigned long flags;
- timer_stats_timer_set_start_info(timer);
BUG_ON(timer_pending(timer) || !timer->function);
/*
@@ -1089,7 +1050,6 @@ int del_timer(struct timer_list *timer)
debug_assert_init(timer);
- timer_stats_timer_clear_start_info(timer);
if (timer_pending(timer)) {
base = lock_timer_base(timer, &flags);
ret = detach_if_pending(timer, base, true);
@@ -1117,10 +1077,9 @@ int try_to_del_timer_sync(struct timer_list *timer)
base = lock_timer_base(timer, &flags);
- if (base->running_timer != timer) {
- timer_stats_timer_clear_start_info(timer);
+ if (base->running_timer != timer)
ret = detach_if_pending(timer, base, true);
- }
+
spin_unlock_irqrestore(&base->lock, flags);
return ret;
@@ -1304,8 +1263,6 @@ static inline void __run_timers(struct tvec_base *base)
data = timer->data;
irqsafe = timer->flags & TIMER_IRQSAFE;
- timer_stats_account_timer(timer);
-
base->running_timer = timer;
detach_expired_timer(timer, base);
@@ -1782,7 +1739,6 @@ static void __init init_timer_cpus(void)
void __init init_timers(void)
{
init_timer_cpus();
- init_timer_stats();
timer_register_cpu_notifier();
open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
}
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index ba7d8b288bb3..83aa1f867b97 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -62,21 +62,11 @@ static void
print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
int idx, u64 now)
{
-#ifdef CONFIG_TIMER_STATS
- char tmp[TASK_COMM_LEN + 1];
-#endif
SEQ_printf(m, " #%d: ", idx);
print_name_offset(m, taddr);
SEQ_printf(m, ", ");
print_name_offset(m, timer->function);
SEQ_printf(m, ", S:%02x", timer->state);
-#ifdef CONFIG_TIMER_STATS
- SEQ_printf(m, ", ");
- print_name_offset(m, timer->start_site);
- memcpy(tmp, timer->start_comm, TASK_COMM_LEN);
- tmp[TASK_COMM_LEN] = 0;
- SEQ_printf(m, ", %s/%d", tmp, timer->start_pid);
-#endif
SEQ_printf(m, "\n");
SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n",
(unsigned long long)ktime_to_ns(hrtimer_get_softexpires(timer)),
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
deleted file mode 100644
index 1adecb4b87c8..000000000000
--- a/kernel/time/timer_stats.c
+++ /dev/null
@@ -1,425 +0,0 @@
-/*
- * kernel/time/timer_stats.c
- *
- * Collect timer usage statistics.
- *
- * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
- * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
- *
- * timer_stats is based on timer_top, a similar functionality which was part of
- * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the
- * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based
- * on dynamic allocation of the statistics entries and linear search based
- * lookup combined with a global lock, rather than the static array, hash
- * and per-CPU locking which is used by timer_stats. It was written for the
- * pre hrtimer kernel code and therefore did not take hrtimers into account.
- * Nevertheless it provided the base for the timer_stats implementation and
- * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks
- * for this effort.
- *
- * timer_top.c is
- * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus
- * Written by Daniel Petrini <d.pensator@gmail.com>
- * timer_top.c was released under the GNU General Public License version 2
- *
- * We export the addresses and counting of timer functions being called,
- * the pid and cmdline from the owner process if applicable.
- *
- * Start/stop data collection:
- * # echo [1|0] >/proc/timer_stats
- *
- * Display the information collected so far:
- * # cat /proc/timer_stats
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/proc_fs.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/sched.h>
-#include <linux/seq_file.h>
-#include <linux/kallsyms.h>
-
-#include <asm/uaccess.h>
-
-/*
- * This is our basic unit of interest: a timer expiry event identified
- * by the timer, its start/expire functions and the PID of the task that
- * started the timer. We count the number of times an event happens:
- */
-struct entry {
- /*
- * Hash list:
- */
- struct entry *next;
-
- /*
- * Hash keys:
- */
- void *timer;
- void *start_func;
- void *expire_func;
- pid_t pid;
-
- /*
- * Number of timeout events:
- */
- unsigned long count;
- u32 flags;
-
- /*
- * We save the command-line string to preserve
- * this information past task exit:
- */
- char comm[TASK_COMM_LEN + 1];
-
-} ____cacheline_aligned_in_smp;
-
-/*
- * Spinlock protecting the tables - not taken during lookup:
- */
-static DEFINE_RAW_SPINLOCK(table_lock);
-
-/*
- * Per-CPU lookup locks for fast hash lookup:
- */
-static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock);
-
-/*
- * Mutex to serialize state changes with show-stats activities:
- */
-static DEFINE_MUTEX(show_mutex);
-
-/*
- * Collection status, active/inactive:
- */
-int __read_mostly timer_stats_active;
-
-/*
- * Beginning/end timestamps of measurement:
- */
-static ktime_t time_start, time_stop;
-
-/*
- * tstat entry structs only get allocated while collection is
- * active and never freed during that time - this simplifies
- * things quite a bit.
- *
- * They get freed when a new collection period is started.
- */
-#define MAX_ENTRIES_BITS 10
-#define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS)
-
-static unsigned long nr_entries;
-static struct entry entries[MAX_ENTRIES];
-
-static atomic_t overflow_count;
-
-/*
- * The entries are in a hash-table, for fast lookup:
- */
-#define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1)
-#define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS)
-#define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1)
-
-#define __tstat_hashfn(entry) \
- (((unsigned long)(entry)->timer ^ \
- (unsigned long)(entry)->start_func ^ \
- (unsigned long)(entry)->expire_func ^ \
- (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK)
-
-#define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry))
-
-static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly;
-
-static void reset_entries(void)
-{
- nr_entries = 0;
- memset(entries, 0, sizeof(entries));
- memset(tstat_hash_table, 0, sizeof(tstat_hash_table));
- atomic_set(&overflow_count, 0);
-}
-
-static struct entry *alloc_entry(void)
-{
- if (nr_entries >= MAX_ENTRIES)
- return NULL;
-
- return entries + nr_entries++;
-}
-
-static int match_entries(struct entry *entry1, struct entry *entry2)
-{
- return entry1->timer == entry2->timer &&
- entry1->start_func == entry2->start_func &&
- entry1->expire_func == entry2->expire_func &&
- entry1->pid == entry2->pid;
-}
-
-/*
- * Look up whether an entry matching this item is present
- * in the hash already. Must be called with irqs off and the
- * lookup lock held:
- */
-static struct entry *tstat_lookup(struct entry *entry, char *comm)
-{
- struct entry **head, *curr, *prev;
-
- head = tstat_hashentry(entry);
- curr = *head;
-
- /*
- * The fastpath is when the entry is already hashed,
- * we do this with the lookup lock held, but with the
- * table lock not held:
- */
- while (curr) {
- if (match_entries(curr, entry))
- return curr;
-
- curr = curr->next;
- }
- /*
- * Slowpath: allocate, set up and link a new hash entry:
- */
- prev = NULL;
- curr = *head;
-
- raw_spin_lock(&table_lock);
- /*
- * Make sure we have not raced with another CPU:
- */
- while (curr) {
- if (match_entries(curr, entry))
- goto out_unlock;
-
- prev = curr;
- curr = curr->next;
- }
-
- curr = alloc_entry();
- if (curr) {
- *curr = *entry;
- curr->count = 0;
- curr->next = NULL;
- memcpy(curr->comm, comm, TASK_COMM_LEN);
-
- smp_mb(); /* Ensure that curr is initialized before insert */
-
- if (prev)
- prev->next = curr;
- else
- *head = curr;
- }
- out_unlock:
- raw_spin_unlock(&table_lock);
-
- return curr;
-}
-
-/**
- * timer_stats_update_stats - Update the statistics for a timer.
- * @timer: pointer to either a timer_list or a hrtimer
- * @pid: the pid of the task which set up the timer
- * @startf: pointer to the function which did the timer setup
- * @timerf: pointer to the timer callback function of the timer
- * @comm: name of the process which set up the timer
- * @tflags: The flags field of the timer
- *
- * When the timer is already registered, then the event counter is
- * incremented. Otherwise the timer is registered in a free slot.
- */
-void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
- void *timerf, char *comm, u32 tflags)
-{
- /*
- * It doesn't matter which lock we take:
- */
- raw_spinlock_t *lock;
- struct entry *entry, input;
- unsigned long flags;
-
- if (likely(!timer_stats_active))
- return;
-
- lock = &per_cpu(tstats_lookup_lock, raw_smp_processor_id());
-
- input.timer = timer;
- input.start_func = startf;
- input.expire_func = timerf;
- input.pid = pid;
- input.flags = tflags;
-
- raw_spin_lock_irqsave(lock, flags);
- if (!timer_stats_active)
- goto out_unlock;
-
- entry = tstat_lookup(&input, comm);
- if (likely(entry))
- entry->count++;
- else
- atomic_inc(&overflow_count);
-
- out_unlock:
- raw_spin_unlock_irqrestore(lock, flags);
-}
-
-static void print_name_offset(struct seq_file *m, unsigned long addr)
-{
- char symname[KSYM_NAME_LEN];
-
- if (lookup_symbol_name(addr, symname) < 0)
- seq_printf(m, "<%p>", (void *)addr);
- else
- seq_printf(m, "%s", symname);
-}
-
-static int tstats_show(struct seq_file *m, void *v)
-{
- struct timespec period;
- struct entry *entry;
- unsigned long ms;
- long events = 0;
- ktime_t time;
- int i;
-
- mutex_lock(&show_mutex);
- /*
- * If still active then calculate up to now:
- */
- if (timer_stats_active)
- time_stop = ktime_get();
-
- time = ktime_sub(time_stop, time_start);
-
- period = ktime_to_timespec(time);
- ms = period.tv_nsec / 1000000;
-
- seq_puts(m, "Timer Stats Version: v0.3\n");
- seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
- if (atomic_read(&overflow_count))
- seq_printf(m, "Overflow: %d entries\n", atomic_read(&overflow_count));
- seq_printf(m, "Collection: %s\n", timer_stats_active ? "active" : "inactive");
-
- for (i = 0; i < nr_entries; i++) {
- entry = entries + i;
- if (entry->flags & TIMER_DEFERRABLE) {
- seq_printf(m, "%4luD, %5d %-16s ",
- entry->count, entry->pid, entry->comm);
- } else {
- seq_printf(m, " %4lu, %5d %-16s ",
- entry->count, entry->pid, entry->comm);
- }
-
- print_name_offset(m, (unsigned long)entry->start_func);
- seq_puts(m, " (");
- print_name_offset(m, (unsigned long)entry->expire_func);
- seq_puts(m, ")\n");
-
- events += entry->count;
- }
-
- ms += period.tv_sec * 1000;
- if (!ms)
- ms = 1;
-
- if (events && period.tv_sec)
- seq_printf(m, "%ld total events, %ld.%03ld events/sec\n",
- events, events * 1000 / ms,
- (events * 1000000 / ms) % 1000);
- else
- seq_printf(m, "%ld total events\n", events);
-
- mutex_unlock(&show_mutex);
-
- return 0;
-}
-
-/*
- * After a state change, make sure all concurrent lookup/update
- * activities have stopped:
- */
-static void sync_access(void)
-{
- unsigned long flags;
- int cpu;
-
- for_each_online_cpu(cpu) {
- raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu);
-
- raw_spin_lock_irqsave(lock, flags);
- /* nothing */
- raw_spin_unlock_irqrestore(lock, flags);
- }
-}
-
-static ssize_t tstats_write(struct file *file, const char __user *buf,
- size_t count, loff_t *offs)
-{
- char ctl[2];
-
- if (count != 2 || *offs)
- return -EINVAL;
-
- if (copy_from_user(ctl, buf, count))
- return -EFAULT;
-
- mutex_lock(&show_mutex);
- switch (ctl[0]) {
- case '0':
- if (timer_stats_active) {
- timer_stats_active = 0;
- time_stop = ktime_get();
- sync_access();
- }
- break;
- case '1':
- if (!timer_stats_active) {
- reset_entries();
- time_start = ktime_get();
- smp_mb();
- timer_stats_active = 1;
- }
- break;
- default:
- count = -EINVAL;
- }
- mutex_unlock(&show_mutex);
-
- return count;
-}
-
-static int tstats_open(struct inode *inode, struct file *filp)
-{
- return single_open(filp, tstats_show, NULL);
-}
-
-static const struct file_operations tstats_fops = {
- .open = tstats_open,
- .read = seq_read,
- .write = tstats_write,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-void __init init_timer_stats(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu)
- raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu));
-}
-
-static int __init init_tstats_procfs(void)
-{
- struct proc_dir_entry *pe;
-
- pe = proc_create("timer_stats", 0644, NULL, &tstats_fops);
- if (!pe)
- return -ENOMEM;
- return 0;
-}
-__initcall(init_tstats_procfs);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c9956440d0e6..12ea4ea619ee 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1471,6 +1471,11 @@ static __init int kprobe_trace_self_tests_init(void)
end:
release_all_trace_kprobes();
+ /*
+ * Wait for the optimizer work to finish. Otherwise it might fiddle
+ * with probes in already freed __init text.
+ */
+ wait_for_kprobe_optimizer();
if (warn)
pr_cont("NG: Some tests are failed. Please check them.\n");
else
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c0ab232e3abd..73c018d7df00 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1508,8 +1508,6 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
return;
}
- timer_stats_timer_set_start_info(&dwork->timer);
-
dwork->wq = wq;
dwork->cpu = cpu;
timer->expires = jiffies + delay;