20 files changed, 229 insertions, 620 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 622571d2a833..2845c5bdc8e3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1596,11 +1596,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	*/
 	recalc_sigpending();
 	if (signal_pending(current)) {
-		spin_unlock(&current->sighand->siglock);
-		write_unlock_irq(&tasklist_lock);
 		retval = -ERESTARTNOINTR;
 		goto bad_fork_cancel_cgroup;
 	}
+	if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) {
+		retval = -ENOMEM;
+		goto bad_fork_cancel_cgroup;
+	}
 
 	if (likely(p->pid)) {
 		ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
@@ -1651,6 +1653,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	return p;
 
 bad_fork_cancel_cgroup:
+	spin_unlock(&current->sighand->siglock);
+	write_unlock_irq(&tasklist_lock);
 	cgroup_cancel_fork(p, cgrp_ss_priv);
 bad_fork_free_pid:
 	threadgroup_change_end(current);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 9812d9c0d483..e0449956298e 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -810,8 +810,8 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
 	if (!desc)
 		return;
 
-	__irq_do_set_handler(desc, handle, 1, NULL);
 	desc->irq_common_data.handler_data = data;
+	__irq_do_set_handler(desc, handle, 1, NULL);
 
 	irq_put_desc_busunlock(desc, flags);
 }
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index dac3724e4c1e..6c8e154c7384 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -47,7 +47,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
 	if (!c->irq_set_affinity) {
 		pr_debug("IRQ%u: unable to set affinity\n", d->irq);
 	} else {
-		int r = irq_do_set_affinity(d, affinity, false);
+		int r = irq_set_affinity_locked(d, affinity, false);
 		if (r)
 			pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n",
 					    d->irq, r);
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index cd6009006510..41b40f310c28 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -268,7 +268,7 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 	struct msi_domain_ops *ops = info->ops;
 	msi_alloc_info_t arg;
 	struct msi_desc *desc;
-	int i, ret, virq;
+	int i, ret, virq = 0;
 
 	ret = ops->msi_check(domain, info, dev);
 	if (ret == 0)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index d10ab6b9b5e0..695763516908 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -563,7 +563,7 @@ static void kprobe_optimizer(struct work_struct *work)
 }
 
 /* Wait for completing optimization and unoptimization */
-static void wait_for_kprobe_optimizer(void)
+void wait_for_kprobe_optimizer(void)
 {
 	mutex_lock(&kprobe_mutex);
 
diff --git a/kernel/padata.c b/kernel/padata.c
index 401227e3967c..ecc7b3f452c7 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -357,7 +357,7 @@ static int padata_setup_cpumasks(struct parallel_data *pd,
 
 	cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
 	if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
-		free_cpumask_var(pd->cpumask.cbcpu);
+		free_cpumask_var(pd->cpumask.pcpu);
 		return -ENOMEM;
 	}
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a65ba137fd15..567ecc826bc8 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -255,7 +255,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 	 * if reparented.
 	 */
 	for (;;) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
+		set_current_state(TASK_INTERRUPTIBLE);
 		if (pid_ns->nr_hashed == init_pids)
 			break;
 		schedule();
diff --git a/kernel/sched/core_ctl.c b/kernel/sched/core_ctl.c
index 0b5f2dea18a1..ce15ae7fe76b 100644
--- a/kernel/sched/core_ctl.c
+++ b/kernel/sched/core_ctl.c
@@ -39,11 +39,13 @@ struct cluster_data {
 	cpumask_t cpu_mask;
 	unsigned int need_cpus;
 	unsigned int task_thres;
+	unsigned int max_nr;
 	s64 need_ts;
 	struct list_head lru;
 	bool pending;
 	spinlock_t pending_lock;
 	bool is_big_cluster;
+	bool enable;
 	int nrrun;
 	bool nrrun_changed;
 	struct task_struct *core_ctl_thread;
@@ -60,6 +62,7 @@ struct cpu_data {
 	struct cluster_data *cluster;
 	struct list_head sib;
 	bool isolated_by_us;
+	unsigned int max_nr;
 };
 
 static DEFINE_PER_CPU(struct cpu_data, cpu_state);
@@ -244,6 +247,29 @@ static ssize_t show_is_big_cluster(const struct cluster_data *state, char *buf)
 	return snprintf(buf, PAGE_SIZE, "%u\n", state->is_big_cluster);
 }
 
+static ssize_t store_enable(struct cluster_data *state,
+				const char *buf, size_t count)
+{
+	unsigned int val;
+	bool bval;
+
+	if (sscanf(buf, "%u\n", &val) != 1)
+		return -EINVAL;
+
+	bval = !!val;
+	if (bval != state->enable) {
+		state->enable = bval;
+		apply_need(state);
+	}
+
+	return count;
+}
+
+static ssize_t show_enable(const struct cluster_data *state, char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%u\n", state->enable);
+}
+
 static ssize_t show_need_cpus(const struct cluster_data *state, char *buf)
 {
 	return snprintf(buf, PAGE_SIZE, "%u\n", state->need_cpus);
@@ -372,6 +398,7 @@ core_ctl_attr_ro(need_cpus);
 core_ctl_attr_ro(active_cpus);
 core_ctl_attr_ro(global_state);
 core_ctl_attr_rw(not_preferred);
+core_ctl_attr_rw(enable);
 
 static struct attribute *default_attrs[] = {
 	&min_cpus.attr,
@@ -381,6 +408,7 @@ static struct attribute *default_attrs[] = {
 	&busy_down_thres.attr,
 	&task_thres.attr,
 	&is_big_cluster.attr,
+	&enable.attr,
 	&need_cpus.attr,
 	&active_cpus.attr,
 	&global_state.attr,
@@ -429,7 +457,6 @@ static struct kobj_type ktype_core_ctl = {
 
 #define RQ_AVG_TOLERANCE 2
 #define RQ_AVG_DEFAULT_MS 20
-#define NR_RUNNING_TOLERANCE 5
 static unsigned int rq_avg_period_ms = RQ_AVG_DEFAULT_MS;
 
 static s64 rq_avg_timestamp_ms;
@@ -437,6 +464,7 @@ static s64 rq_avg_timestamp_ms;
 static void update_running_avg(bool trigger_update)
 {
 	int avg, iowait_avg, big_avg, old_nrrun;
+	int old_max_nr, max_nr, big_max_nr;
 	s64 now;
 	unsigned long flags;
 	struct cluster_data *cluster;
@@ -450,40 +478,23 @@ static void update_running_avg(bool trigger_update)
 		return;
 	}
 	rq_avg_timestamp_ms = now;
-	sched_get_nr_running_avg(&avg, &iowait_avg, &big_avg);
+	sched_get_nr_running_avg(&avg, &iowait_avg, &big_avg,
+				 &max_nr, &big_max_nr);
 
 	spin_unlock_irqrestore(&state_lock, flags);
 
-	/*
-	 * Round up to the next integer if the average nr running tasks
-	 * is within NR_RUNNING_TOLERANCE/100 of the next integer.
-	 * If normal rounding up is used, it will allow a transient task
-	 * to trigger online event. By the time core is onlined, the task
-	 * has finished.
-	 * Rounding to closest suffers same problem because scheduler
-	 * might only provide running stats per jiffy, and a transient
-	 * task could skew the number for one jiffy. If core control
-	 * samples every 2 jiffies, it will observe 0.5 additional running
-	 * average which rounds up to 1 task.
-	 */
-	avg = (avg + NR_RUNNING_TOLERANCE) / 100;
-	big_avg = (big_avg + NR_RUNNING_TOLERANCE) / 100;
-
 	for_each_cluster(cluster, index) {
 		if (!cluster->inited)
 			continue;
+
 		old_nrrun = cluster->nrrun;
-		/*
-		 * Big cluster only need to take care of big tasks, but if
-		 * there are not enough big cores, big tasks need to be run
-		 * on little as well. Thus for little's runqueue stat, it
-		 * has to use overall runqueue average, or derive what big
-		 * tasks would have to be run on little. The latter approach
-		 * is not easy to get given core control reacts much slower
-		 * than scheduler, and can't predict scheduler's behavior.
-		 */
+		old_max_nr = cluster->max_nr;
 		cluster->nrrun = cluster->is_big_cluster ? big_avg : avg;
-		if (cluster->nrrun != old_nrrun) {
+		cluster->max_nr = cluster->is_big_cluster ? big_max_nr : max_nr;
+
+		if (cluster->nrrun != old_nrrun ||
+			cluster->max_nr != old_max_nr) {
+
 			if (trigger_update)
 				apply_need(cluster);
 			else
@@ -493,6 +504,7 @@ static void update_running_avg(bool trigger_update)
 	return;
 }
 
+#define MAX_NR_THRESHOLD	4
 /* adjust needed CPUs based on current runqueue information */
 static unsigned int apply_task_need(const struct cluster_data *cluster,
 				    unsigned int new_need)
@@ -503,7 +515,15 @@ static unsigned int apply_task_need(const struct cluster_data *cluster,
 
 	/* only unisolate more cores if there are tasks to run */
 	if (cluster->nrrun > new_need)
-		return new_need + 1;
+		new_need = new_need + 1;
+
+	/*
+	 * We don't want tasks to be overcrowded in a cluster.
+	 * If any CPU has more than MAX_NR_THRESHOLD in the last
+	 * window, bring another CPU to help out.
+	 */
+	if (cluster->max_nr > MAX_NR_THRESHOLD)
+		new_need = new_need + 1;
 
 	return new_need;
 }
@@ -549,7 +569,7 @@ static bool eval_need(struct cluster_data *cluster)
 
 	spin_lock_irqsave(&state_lock, flags);
 
-	if (cluster->boost) {
+	if (cluster->boost || !cluster->enable) {
 		need_cpus = cluster->max_cpus;
 	} else {
 		cluster->active_cpus = get_active_cpu_count(cluster);
@@ -1046,6 +1066,7 @@ static int cluster_init(const struct cpumask *mask)
 	cluster->offline_delay_ms = 100;
 	cluster->task_thres = UINT_MAX;
 	cluster->nrrun = cluster->num_cpus;
+	cluster->enable = true;
 	INIT_LIST_HEAD(&cluster->lru);
 	spin_lock_init(&cluster->pending_lock);
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4d96380b35e8..3b6038225c17 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3536,6 +3536,16 @@ kick_active_balance(struct rq *rq, struct task_struct *p, int new_cpu)
 
 static DEFINE_RAW_SPINLOCK(migration_lock);
 
+static bool do_migration(int reason, int new_cpu, int cpu)
+{
+	if ((reason == UP_MIGRATION || reason == DOWN_MIGRATION)
+				&& same_cluster(new_cpu, cpu))
+		return false;
+
+	/* Inter cluster high irqload migrations are OK */
+	return new_cpu != cpu;
+}
+
 /*
  * Check if currently running task should be migrated to a better cpu.
  *
@@ -3553,7 +3563,7 @@ void check_for_migration(struct rq *rq, struct task_struct *p)
 	raw_spin_lock(&migration_lock);
 	new_cpu = select_best_cpu(p, cpu, reason, 0);
 
-	if (new_cpu != cpu) {
+	if (do_migration(reason, new_cpu, cpu)) {
 		active_balance = kick_active_balance(rq, p, new_cpu);
 		if (active_balance)
 			mark_reserved(new_cpu);
@@ -5102,6 +5112,26 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
 	if (!cfs_bandwidth_used())
 		return;
 
+	/* Synchronize hierarchical throttle counter: */
+	if (unlikely(!cfs_rq->throttle_uptodate)) {
+		struct rq *rq = rq_of(cfs_rq);
+		struct cfs_rq *pcfs_rq;
+		struct task_group *tg;
+
+		cfs_rq->throttle_uptodate = 1;
+
+		/* Get closest up-to-date node, because leaves go first: */
+		for (tg = cfs_rq->tg->parent; tg; tg = tg->parent) {
+			pcfs_rq = tg->cfs_rq[cpu_of(rq)];
+			if (pcfs_rq->throttle_uptodate)
+				break;
+		}
+		if (tg) {
+			cfs_rq->throttle_count = pcfs_rq->throttle_count;
+			cfs_rq->throttled_clock_task = rq_clock_task(rq);
+		}
+	}
+
 	/* an active group must be handled by the update_curr()->put() path */
 	if (!cfs_rq->runtime_enabled || cfs_rq->curr)
 		return;
@@ -5492,15 +5522,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 		/* Don't dequeue parent if it has other entities besides us */
 		if (cfs_rq->load.weight) {
+			/* Avoid re-evaluating load for this entity: */
+			se = parent_entity(se);
 			/*
 			 * Bias pick_next to pick a task from this cfs_rq, as
 			 * p is sleeping when it is within its sched_slice.
 			 */
-			if (task_sleep && parent_entity(se))
-				set_next_buddy(parent_entity(se));
-
-			/* avoid re-evaluating load for this entity */
-			se = parent_entity(se);
+			if (task_sleep && se && !throttled_hierarchy(cfs_rq))
+				set_next_buddy(se);
 			break;
 		}
 		flags |= DEQUEUE_SLEEP;
diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c
index df47c26ab6d2..ae6876e62c0f 100644
--- a/kernel/sched/hmp.c
+++ b/kernel/sched/hmp.c
@@ -1602,7 +1602,7 @@ unsigned int nr_eligible_big_tasks(int cpu)
 	int nr_big = rq->hmp_stats.nr_big_tasks;
 	int nr = rq->nr_running;
 
-	if (cpu_max_possible_capacity(cpu) != max_possible_capacity)
+	if (!is_max_capacity_cpu(cpu))
 		return nr_big;
 
 	return nr;
@@ -2521,10 +2521,42 @@ static inline u32 predict_and_update_buckets(struct rq *rq,
 	return pred_demand;
 }
 
-static void update_task_cpu_cycles(struct task_struct *p, int cpu)
+#define THRESH_CC_UPDATE (2 * NSEC_PER_USEC)
+
+/*
+ * Assumes rq_lock is held and wallclock was recorded in the same critical
+ * section as this function's invocation.
+ */
+static inline u64 read_cycle_counter(int cpu, u64 wallclock)
+{
+	struct sched_cluster *cluster = cpu_rq(cpu)->cluster;
+	u64 delta;
+
+	if (unlikely(!cluster))
+		return cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu);
+
+	/*
+	 * Why don't we need locking here? Let's say that delta is negative
+	 * because some other CPU happened to update last_cc_update with a
+	 * more recent timestamp. We simply read the conter again in that case
+	 * with no harmful side effects. This can happen if there is an FIQ
+	 * between when we read the wallclock and when we use it here.
+	 */
+	delta = wallclock - atomic64_read(&cluster->last_cc_update);
+	if (delta > THRESH_CC_UPDATE) {
+		atomic64_set(&cluster->cycles,
+			     cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu));
+		atomic64_set(&cluster->last_cc_update, wallclock);
+	}
+
+	return atomic64_read(&cluster->cycles);
+}
+
+static void update_task_cpu_cycles(struct task_struct *p, int cpu,
+				   u64 wallclock)
 {
 	if (use_cycle_counter)
-		p->cpu_cycles = cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu);
+		p->cpu_cycles = read_cycle_counter(cpu, wallclock);
 }
 
 static void
@@ -2542,7 +2574,7 @@ update_task_rq_cpu_cycles(struct task_struct *p, struct rq *rq, int event,
 		return;
 	}
 
-	cur_cycles = cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu);
+	cur_cycles = read_cycle_counter(cpu, wallclock);
 
 	/*
 	 * If current task is idle task and irqtime == 0 CPU was
@@ -2579,7 +2611,8 @@ update_task_rq_cpu_cycles(struct task_struct *p, struct rq *rq, int event,
 	trace_sched_get_task_cpu_cycles(cpu, event, rq->cc.cycles, rq->cc.time);
 }
 
-static int account_busy_for_task_demand(struct task_struct *p, int event)
+static int
+account_busy_for_task_demand(struct rq *rq, struct task_struct *p, int event)
 {
 	/*
 	 * No need to bother updating task demand for exiting tasks
@@ -2598,6 +2631,17 @@ static int account_busy_for_task_demand(struct task_struct *p, int event)
 			 (event == PICK_NEXT_TASK || event == TASK_MIGRATE)))
 		return 0;
 
+	/*
+	 * TASK_UPDATE can be called on sleeping task, when its moved between
+	 * related groups
+	 */
+	if (event == TASK_UPDATE) {
+		if (rq->curr == p)
+			return 1;
+
+		return p->on_rq ? SCHED_ACCOUNT_WAIT_TIME : 0;
+	}
+
 	return 1;
 }
 
@@ -2738,7 +2782,7 @@ static u64 update_task_demand(struct task_struct *p, struct rq *rq,
 	u64 runtime;
 
 	new_window = mark_start < window_start;
-	if (!account_busy_for_task_demand(p, event)) {
+	if (!account_busy_for_task_demand(rq, p, event)) {
 		if (new_window)
 			/*
 			 * If the time accounted isn't being accounted as
@@ -2822,7 +2866,7 @@ void update_task_ravg(struct task_struct *p, struct rq *rq, int event,
 	update_window_start(rq, wallclock);
 
 	if (!p->ravg.mark_start) {
-		update_task_cpu_cycles(p, cpu_of(rq));
+		update_task_cpu_cycles(p, cpu_of(rq), wallclock);
 		goto done;
 	}
 
@@ -2890,7 +2934,7 @@ void sched_account_irqstart(int cpu, struct task_struct *curr, u64 wallclock)
 	if (is_idle_task(curr)) {
 		/* We're here without rq->lock held, IRQ disabled */
 		raw_spin_lock(&rq->lock);
-		update_task_cpu_cycles(curr, cpu);
+		update_task_cpu_cycles(curr, cpu, sched_ktime_clock());
 		raw_spin_unlock(&rq->lock);
 	}
 }
@@ -2935,7 +2979,7 @@ void mark_task_starting(struct task_struct *p)
 	p->ravg.mark_start = p->last_wake_ts = wallclock;
 	p->last_cpu_selected_ts = wallclock;
 	p->last_switch_out_ts = 0;
-	update_task_cpu_cycles(p, cpu_of(rq));
+	update_task_cpu_cycles(p, cpu_of(rq), wallclock);
 }
 
 void set_window_start(struct rq *rq)
@@ -3548,7 +3592,7 @@ void fixup_busy_time(struct task_struct *p, int new_cpu)
 	update_task_ravg(p, task_rq(p), TASK_MIGRATE,
 			 wallclock, 0);
 
-	update_task_cpu_cycles(p, new_cpu);
+	update_task_cpu_cycles(p, new_cpu, wallclock);
 
 	new_task = is_new_task(p);
 	/* Protected by rq_lock */
@@ -4303,8 +4347,20 @@ void note_task_waking(struct task_struct *p, u64 wallclock)
 {
 	u64 sleep_time = wallclock - p->last_switch_out_ts;
 
-	p->last_wake_ts = wallclock;
+	/*
+	 * When a short burst and short sleeping task goes for a long
+	 * sleep, the task's avg_sleep_time gets boosted. It will not
+	 * come below short_sleep threshold for a lot of time and it
+	 * results in incorrect packing. The idead behind tracking
+	 * avg_sleep_time is to detect if a task is short sleeping
+	 * or not. So limit the sleep time to twice the short sleep
+	 * threshold. For regular long sleeping tasks, the avg_sleep_time
+	 * would be higher than threshold, and packing happens correctly.
+	 */
+	sleep_time = min_t(u64, sleep_time, 2 * sysctl_sched_short_sleep);
 	update_avg(&p->ravg.avg_sleep_time, sleep_time);
+
+	p->last_wake_ts = wallclock;
 }
 
 #ifdef CONFIG_CGROUP_SCHED
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 276a2387f06f..b88f647ea935 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -397,6 +397,8 @@ struct sched_cluster {
 	unsigned int static_cluster_pwr_cost;
 	int notifier_sent;
 	bool wake_up_idle;
+	atomic64_t last_cc_update;
+	atomic64_t cycles;
 };
 
 extern unsigned long all_cluster_ids[];
@@ -509,7 +511,7 @@ struct cfs_rq {
 
 	u64 throttled_clock, throttled_clock_task;
 	u64 throttled_clock_task_time;
-	int throttled, throttle_count;
+	int throttled, throttle_count, throttle_uptodate;
 	struct list_head throttled_list;
 #endif /* CONFIG_CFS_BANDWIDTH */
 #endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -1225,6 +1227,11 @@ static inline bool hmp_capable(void)
 	return max_possible_capacity != min_max_possible_capacity;
 }
 
+static inline bool is_max_capacity_cpu(int cpu)
+{
+	return cpu_max_possible_capacity(cpu) == max_possible_capacity;
+}
+
 /*
  * 'load' is in reference to "best cpu" at its best frequency.
  * Scale that in reference to a given cpu, accounting for how bad it is
@@ -1601,6 +1608,8 @@ static inline unsigned int nr_eligible_big_tasks(int cpu)
 	return 0;
 }
 
+static inline bool is_max_capacity_cpu(int cpu) { return true; }
+
 static inline int pct_task_load(struct task_struct *p) { return 0; }
 
 static inline int cpu_capacity(int cpu)
diff --git a/kernel/sched/sched_avg.c b/kernel/sched/sched_avg.c
index 29d8a26a78ed..ba5a326a9fd8 100644
--- a/kernel/sched/sched_avg.c
+++ b/kernel/sched/sched_avg.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, 2015-2016, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2012, 2015-2017, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -26,11 +26,13 @@ static DEFINE_PER_CPU(u64, nr_prod_sum);
 static DEFINE_PER_CPU(u64, last_time);
 static DEFINE_PER_CPU(u64, nr_big_prod_sum);
 static DEFINE_PER_CPU(u64, nr);
+static DEFINE_PER_CPU(u64, nr_max);
 
 static DEFINE_PER_CPU(unsigned long, iowait_prod_sum);
 static DEFINE_PER_CPU(spinlock_t, nr_lock) = __SPIN_LOCK_UNLOCKED(nr_lock);
 static s64 last_get_time;
 
+#define DIV64_U64_ROUNDUP(X, Y) div64_u64((X) + (Y - 1), Y)
 /**
  * sched_get_nr_running_avg
  * @return: Average nr_running, iowait and nr_big_tasks value since last poll.
@@ -40,7 +42,8 @@ static s64 last_get_time;
  * Obtains the average nr_running value since the last poll.
  * This function may not be called concurrently with itself
  */
-void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg)
+void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg,
+			      unsigned int *max_nr, unsigned int *big_max_nr)
 {
 	int cpu;
 	u64 curr_time = sched_clock();
@@ -50,6 +53,8 @@ void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg)
 	*avg = 0;
 	*iowait_avg = 0;
 	*big_avg = 0;
+	*max_nr = 0;
+	*big_max_nr = 0;
 
 	if (!diff)
 		return;
@@ -78,17 +83,35 @@ void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg)
 		per_cpu(nr_big_prod_sum, cpu) = 0;
 		per_cpu(iowait_prod_sum, cpu) = 0;
 
+		if (*max_nr < per_cpu(nr_max, cpu))
+			*max_nr = per_cpu(nr_max, cpu);
+
+		if (is_max_capacity_cpu(cpu)) {
+			if (*big_max_nr < per_cpu(nr_max, cpu))
+				*big_max_nr = per_cpu(nr_max, cpu);
+		}
+
+		per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
 		spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
 	}
 
 	diff = curr_time - last_get_time;
 	last_get_time = curr_time;
 
-	*avg = (int)div64_u64(tmp_avg * 100, diff);
-	*big_avg = (int)div64_u64(tmp_big_avg * 100, diff);
-	*iowait_avg = (int)div64_u64(tmp_iowait * 100, diff);
-
-	trace_sched_get_nr_running_avg(*avg, *big_avg, *iowait_avg);
+	/*
+	 * Any task running on BIG cluster and BIG tasks running on little
+	 * cluster contributes to big_avg. Small or medium tasks can also
+	 * run on BIG cluster when co-location and scheduler boost features
+	 * are activated. We don't want these tasks to downmigrate to little
+	 * cluster when BIG CPUs are available but isolated. Round up the
+	 * average values so that core_ctl aggressively unisolate BIG CPUs.
+	 */
+	*avg = (int)DIV64_U64_ROUNDUP(tmp_avg, diff);
+	*big_avg = (int)DIV64_U64_ROUNDUP(tmp_big_avg, diff);
+	*iowait_avg = (int)DIV64_U64_ROUNDUP(tmp_iowait, diff);
+
+	trace_sched_get_nr_running_avg(*avg, *big_avg, *iowait_avg,
+				       *max_nr, *big_max_nr);
 
 	BUG_ON(*avg < 0 || *big_avg < 0 || *iowait_avg < 0);
 	pr_debug("%s - avg:%d big_avg:%d iowait_avg:%d\n",
@@ -121,6 +144,9 @@ void sched_update_nr_prod(int cpu, long delta, bool inc)
 
 	BUG_ON((s64)per_cpu(nr, cpu) < 0);
 
+	if (per_cpu(nr, cpu) > per_cpu(nr_max, cpu))
+		per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
+
 	per_cpu(nr_prod_sum, cpu) += nr_running * diff;
 	per_cpu(nr_big_prod_sum, cpu) += nr_eligible_big_tasks(cpu) * diff;
 	per_cpu(iowait_prod_sum, cpu) += nr_iowait_cpu(cpu) * diff;
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 5819ca07a22b..b9b881ebaff3 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -9,7 +9,6 @@ ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y)
 endif
 obj-$(CONFIG_GENERIC_SCHED_CLOCK)		+= sched_clock.o
 obj-$(CONFIG_TICK_ONESHOT)			+= tick-oneshot.o tick-sched.o
-obj-$(CONFIG_TIMER_STATS)			+= timer_stats.o
 obj-$(CONFIG_DEBUG_FS)				+= timekeeping_debug.o
 obj-$(CONFIG_TEST_UDELAY)			+= test_udelay.o
 
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index bf7fc4989e5c..e7c2392666cb 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -49,7 +49,6 @@
 #include <linux/sched/deadline.h>
 #include <linux/timer.h>
 #include <linux/freezer.h>
-#include <linux/delay.h>
 
 #include <asm/uaccess.h>
 
@@ -782,34 +781,6 @@ void hrtimers_resume(void)
 	clock_was_set_delayed();
 }
 
-static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
-{
-#ifdef CONFIG_TIMER_STATS
-	if (timer->start_site)
-		return;
-	timer->start_site = __builtin_return_address(0);
-	memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
-	timer->start_pid = current->pid;
-#endif
-}
-
-static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
-{
-#ifdef CONFIG_TIMER_STATS
-	timer->start_site = NULL;
-#endif
-}
-
-static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
-{
-#ifdef CONFIG_TIMER_STATS
-	if (likely(!timer_stats_active))
-		return;
-	timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
-				 timer->function, timer->start_comm, 0);
-#endif
-}
-
 /*
  * Counterpart to lock_hrtimer_base above:
  */
@@ -953,7 +924,6 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest
 		 * rare case and less expensive than a smp call.
 		 */
 		debug_deactivate(timer);
-		timer_stats_hrtimer_clear_start_info(timer);
 		reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
 
 		if (!restart)
@@ -1012,8 +982,6 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 	/* Switch the timer base, if necessary: */
 	new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
 
-	timer_stats_hrtimer_set_start_info(timer);
-
 	/* Update pinned state */
 	timer->state &= ~HRTIMER_STATE_PINNED;
 	timer->state |= (!!(mode & HRTIMER_MODE_PINNED)) << HRTIMER_PINNED_SHIFT;
@@ -1154,12 +1122,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 	base = hrtimer_clockid_to_base(clock_id);
 	timer->base = &cpu_base->clock_base[base];
 	timerqueue_init(&timer->node);
-
-#ifdef CONFIG_TIMER_STATS
-	timer->start_site = NULL;
-	timer->start_pid = -1;
-	memset(timer->start_comm, 0, TASK_COMM_LEN);
-#endif
 }
 
 /**
@@ -1243,7 +1205,6 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
 	raw_write_seqcount_barrier(&cpu_base->seq);
 
 	__remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
-	timer_stats_account_hrtimer(timer);
 	fn = timer->function;
 
 	/*
@@ -1631,42 +1592,22 @@ static void init_hrtimers_cpu(int cpu)
 }
 
 #if defined(CONFIG_HOTPLUG_CPU)
-static void migrate_hrtimer_list(struct hrtimer_cpu_base *old_base,
-				 struct hrtimer_cpu_base *new_base,
-				 unsigned int i,
-				 bool wait,
+static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
+				 struct hrtimer_clock_base *new_base,
 				 bool remove_pinned)
 {
 	struct hrtimer *timer;
 	struct timerqueue_node *node;
 	struct timerqueue_head pinned;
 	int is_pinned;
-	struct hrtimer_clock_base *old_c_base = &old_base->clock_base[i];
-	struct hrtimer_clock_base *new_c_base = &new_base->clock_base[i];
+	bool is_hotplug = !cpu_online(old_base->cpu_base->cpu);
 
 	timerqueue_init_head(&pinned);
 
-	while ((node = timerqueue_getnext(&old_c_base->active))) {
+	while ((node = timerqueue_getnext(&old_base->active))) {
 		timer = container_of(node, struct hrtimer, node);
-		if (wait) {
-			/* Ensure timers are done running before continuing */
-			while (hrtimer_callback_running(timer)) {
-				raw_spin_unlock(&old_base->lock);
-				raw_spin_unlock(&new_base->lock);
-				cpu_relax();
-				/*
-				 * cpu_relax may just be a barrier. Grant the
-				 * run_hrtimer_list code some time to obtain the
-				 * spinlock.
-				 */
-				udelay(2);
-				raw_spin_lock(&new_base->lock);
-				raw_spin_lock_nested(&old_base->lock,
-							SINGLE_DEPTH_NESTING);
-			}
-		} else {
+		if (is_hotplug)
 			BUG_ON(hrtimer_callback_running(timer));
-		}
 		debug_deactivate(timer);
 
 		/*
@@ -1674,7 +1615,7 @@ static void migrate_hrtimer_list(struct hrtimer_cpu_base *old_base,
 		 * timer could be seen as !active and just vanish away
 		 * under us on another CPU
 		 */
-		__remove_hrtimer(timer, old_c_base, HRTIMER_STATE_ENQUEUED, 0);
+		__remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0);
 
 		is_pinned = timer->state & HRTIMER_STATE_PINNED;
 		if (!remove_pinned && is_pinned) {
@@ -1682,7 +1623,7 @@ static void migrate_hrtimer_list(struct hrtimer_cpu_base *old_base,
 			continue;
 		}
 
-		timer->base = new_c_base;
+		timer->base = new_base;
 		/*
 		 * Enqueue the timers on the new cpu. This does not
 		 * reprogram the event device in case the timer
@@ -1691,7 +1632,7 @@ static void migrate_hrtimer_list(struct hrtimer_cpu_base *old_base,
 		 * sort out already expired timers and reprogram the
 		 * event device.
 		 */
-		enqueue_hrtimer(timer, new_c_base);
+		enqueue_hrtimer(timer, new_base);
 	}
 
 	/* Re-queue pinned timers for non-hotplug usecase */
@@ -1699,11 +1640,11 @@ static void migrate_hrtimer_list(struct hrtimer_cpu_base *old_base,
 		timer = container_of(node, struct hrtimer, node);
 
 		timerqueue_del(&pinned, &timer->node);
-		enqueue_hrtimer(timer, old_c_base);
+		enqueue_hrtimer(timer, old_base);
 	}
 }
 
-static void __migrate_hrtimers(int scpu, bool wait, bool remove_pinned)
+static void __migrate_hrtimers(int scpu, bool remove_pinned)
 {
 	struct hrtimer_cpu_base *old_base, *new_base;
 	unsigned long flags;
@@ -1720,8 +1661,8 @@ static void __migrate_hrtimers(int scpu, bool wait, bool remove_pinned)
 	raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
-		migrate_hrtimer_list(old_base, new_base, i, wait,
-								remove_pinned);
+		migrate_hrtimer_list(&old_base->clock_base[i],
+				     &new_base->clock_base[i], remove_pinned);
 	}
 
 	raw_spin_unlock(&old_base->lock);
@@ -1737,12 +1678,12 @@ static void migrate_hrtimers(int scpu)
 	BUG_ON(cpu_online(scpu));
 	tick_cancel_sched_timer(scpu);
 
-	__migrate_hrtimers(scpu, false, true);
+	__migrate_hrtimers(scpu, true);
 }
 
 void hrtimer_quiesce_cpu(void *cpup)
 {
-	__migrate_hrtimers(*(int *)cpup, true, false);
+	__migrate_hrtimers(*(int *)cpup, false);
 }
 
 #endif /* CONFIG_HOTPLUG_CPU */
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 80016b329d94..051544aec37c 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1250,7 +1250,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 			   cputime_t *newval, cputime_t *oldval)
 {
-	unsigned long long now;
+	unsigned long long now = 0;
 
 	WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED);
 	cpu_timer_sample_group(clock_idx, tsk, &now);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 75ce9cbc313d..90a82deece45 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -500,38 +500,6 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 	}
 }
 
-#ifdef CONFIG_TIMER_STATS
-void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
-{
-	if (timer->start_site)
-		return;
-
-	timer->start_site = addr;
-	memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
-	timer->start_pid = current->pid;
-}
-
-static void timer_stats_account_timer(struct timer_list *timer)
-{
-	void *site;
-
-	/*
-	 * start_site can be concurrently reset by
-	 * timer_stats_timer_clear_start_info()
-	 */
-	site = READ_ONCE(timer->start_site);
-	if (likely(!site))
-		return;
-
-	timer_stats_update_stats(timer, timer->start_pid, site,
-				 timer->function, timer->start_comm,
-				 timer->flags);
-}
-
-#else
-static void timer_stats_account_timer(struct timer_list *timer) {}
-#endif
-
 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
 
 static struct debug_obj_descr timer_debug_descr;
@@ -734,11 +702,6 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags,
 	timer->entry.pprev = NULL;
 	timer->flags = flags | raw_smp_processor_id();
 	timer->slack = -1;
-#ifdef CONFIG_TIMER_STATS
-	timer->start_site = NULL;
-	timer->start_pid = -1;
-	memset(timer->start_comm, 0, TASK_COMM_LEN);
-#endif
 	lockdep_init_map(&timer->lockdep_map, name, key, 0);
 }
 
@@ -845,7 +808,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
 	unsigned long flags;
 	int ret = 0;
 
-	timer_stats_timer_set_start_info(timer);
 	BUG_ON(!timer->function);
 
 	base = lock_timer_base(timer, &flags);
@@ -1044,7 +1006,6 @@ void add_timer_on(struct timer_list *timer, int cpu)
 	struct tvec_base *base;
 	unsigned long flags;
 
-	timer_stats_timer_set_start_info(timer);
 	BUG_ON(timer_pending(timer) || !timer->function);
 
 	/*
@@ -1089,7 +1050,6 @@ int del_timer(struct timer_list *timer)
 
 	debug_assert_init(timer);
 
-	timer_stats_timer_clear_start_info(timer);
 	if (timer_pending(timer)) {
 		base = lock_timer_base(timer, &flags);
 		ret = detach_if_pending(timer, base, true);
@@ -1117,10 +1077,9 @@ int try_to_del_timer_sync(struct timer_list *timer)
 
 	base = lock_timer_base(timer, &flags);
 
-	if (base->running_timer != timer) {
-		timer_stats_timer_clear_start_info(timer);
+	if (base->running_timer != timer)
 		ret = detach_if_pending(timer, base, true);
-	}
+
 	spin_unlock_irqrestore(&base->lock, flags);
 
 	return ret;
@@ -1304,8 +1263,6 @@ static inline void __run_timers(struct tvec_base *base)
 			data = timer->data;
 			irqsafe = timer->flags & TIMER_IRQSAFE;
 
-			timer_stats_account_timer(timer);
-
 			base->running_timer = timer;
 			detach_expired_timer(timer, base);
 
@@ -1782,7 +1739,6 @@ static void __init init_timer_cpus(void)
 void __init init_timers(void)
 {
 	init_timer_cpus();
-	init_timer_stats();
 	timer_register_cpu_notifier();
 	open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
 }
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index ba7d8b288bb3..83aa1f867b97 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -62,21 +62,11 @@ static void
 print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
 	    int idx, u64 now)
 {
-#ifdef CONFIG_TIMER_STATS
-	char tmp[TASK_COMM_LEN + 1];
-#endif
 	SEQ_printf(m, " #%d: ", idx);
 	print_name_offset(m, taddr);
 	SEQ_printf(m, ", ");
 	print_name_offset(m, timer->function);
 	SEQ_printf(m, ", S:%02x", timer->state);
-#ifdef CONFIG_TIMER_STATS
-	SEQ_printf(m, ", ");
-	print_name_offset(m, timer->start_site);
-	memcpy(tmp, timer->start_comm, TASK_COMM_LEN);
-	tmp[TASK_COMM_LEN] = 0;
-	SEQ_printf(m, ", %s/%d", tmp, timer->start_pid);
-#endif
 	SEQ_printf(m, "\n");
 	SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n",
 		(unsigned long long)ktime_to_ns(hrtimer_get_softexpires(timer)),
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
deleted file mode 100644
index 1adecb4b87c8..000000000000
--- a/kernel/time/timer_stats.c
+++ /dev/null
@@ -1,425 +0,0 @@
-/*
- * kernel/time/timer_stats.c
- *
- * Collect timer usage statistics.
- *
- * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
- * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
- *
- * timer_stats is based on timer_top, a similar functionality which was part of
- * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the
- * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based
- * on dynamic allocation of the statistics entries and linear search based
- * lookup combined with a global lock, rather than the static array, hash
- * and per-CPU locking which is used by timer_stats. It was written for the
- * pre hrtimer kernel code and therefore did not take hrtimers into account.
- * Nevertheless it provided the base for the timer_stats implementation and
- * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks
- * for this effort.
- *
- * timer_top.c is
- *	Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus
- *	Written by Daniel Petrini <d.pensator@gmail.com>
- *	timer_top.c was released under the GNU General Public License version 2
- *
- * We export the addresses and counting of timer functions being called,
- * the pid and cmdline from the owner process if applicable.
- *
- * Start/stop data collection:
- * # echo [1|0] >/proc/timer_stats
- *
- * Display the information collected so far:
- * # cat /proc/timer_stats
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/proc_fs.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/sched.h>
-#include <linux/seq_file.h>
-#include <linux/kallsyms.h>
-
-#include <asm/uaccess.h>
-
-/*
- * This is our basic unit of interest: a timer expiry event identified
- * by the timer, its start/expire functions and the PID of the task that
- * started the timer. We count the number of times an event happens:
- */
-struct entry {
-	/*
-	 * Hash list:
-	 */
-	struct entry		*next;
-
-	/*
-	 * Hash keys:
-	 */
-	void			*timer;
-	void			*start_func;
-	void			*expire_func;
-	pid_t			pid;
-
-	/*
-	 * Number of timeout events:
-	 */
-	unsigned long		count;
-	u32			flags;
-
-	/*
-	 * We save the command-line string to preserve
-	 * this information past task exit:
-	 */
-	char			comm[TASK_COMM_LEN + 1];
-
-} ____cacheline_aligned_in_smp;
-
-/*
- * Spinlock protecting the tables - not taken during lookup:
- */
-static DEFINE_RAW_SPINLOCK(table_lock);
-
-/*
- * Per-CPU lookup locks for fast hash lookup:
- */
-static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock);
-
-/*
- * Mutex to serialize state changes with show-stats activities:
- */
-static DEFINE_MUTEX(show_mutex);
-
-/*
- * Collection status, active/inactive:
- */
-int __read_mostly timer_stats_active;
-
-/*
- * Beginning/end timestamps of measurement:
- */
-static ktime_t time_start, time_stop;
-
-/*
- * tstat entry structs only get allocated while collection is
- * active and never freed during that time - this simplifies
- * things quite a bit.
- *
- * They get freed when a new collection period is started.
- */
-#define MAX_ENTRIES_BITS	10
-#define MAX_ENTRIES		(1UL << MAX_ENTRIES_BITS)
-
-static unsigned long nr_entries;
-static struct entry entries[MAX_ENTRIES];
-
-static atomic_t overflow_count;
-
-/*
- * The entries are in a hash-table, for fast lookup:
- */
-#define TSTAT_HASH_BITS		(MAX_ENTRIES_BITS - 1)
-#define TSTAT_HASH_SIZE		(1UL << TSTAT_HASH_BITS)
-#define TSTAT_HASH_MASK		(TSTAT_HASH_SIZE - 1)
-
-#define __tstat_hashfn(entry)						\
-	(((unsigned long)(entry)->timer       ^				\
-	  (unsigned long)(entry)->start_func  ^				\
-	  (unsigned long)(entry)->expire_func ^				\
-	  (unsigned long)(entry)->pid		) & TSTAT_HASH_MASK)
-
-#define tstat_hashentry(entry)	(tstat_hash_table + __tstat_hashfn(entry))
-
-static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly;
-
-static void reset_entries(void)
-{
-	nr_entries = 0;
-	memset(entries, 0, sizeof(entries));
-	memset(tstat_hash_table, 0, sizeof(tstat_hash_table));
-	atomic_set(&overflow_count, 0);
-}
-
-static struct entry *alloc_entry(void)
-{
-	if (nr_entries >= MAX_ENTRIES)
-		return NULL;
-
-	return entries + nr_entries++;
-}
-
-static int match_entries(struct entry *entry1, struct entry *entry2)
-{
-	return entry1->timer       == entry2->timer	  &&
-	       entry1->start_func  == entry2->start_func  &&
-	       entry1->expire_func == entry2->expire_func &&
-	       entry1->pid	   == entry2->pid;
-}
-
-/*
- * Look up whether an entry matching this item is present
- * in the hash already. Must be called with irqs off and the
- * lookup lock held:
- */
-static struct entry *tstat_lookup(struct entry *entry, char *comm)
-{
-	struct entry **head, *curr, *prev;
-
-	head = tstat_hashentry(entry);
-	curr = *head;
-
-	/*
-	 * The fastpath is when the entry is already hashed,
-	 * we do this with the lookup lock held, but with the
-	 * table lock not held:
-	 */
-	while (curr) {
-		if (match_entries(curr, entry))
-			return curr;
-
-		curr = curr->next;
-	}
-	/*
-	 * Slowpath: allocate, set up and link a new hash entry:
-	 */
-	prev = NULL;
-	curr = *head;
-
-	raw_spin_lock(&table_lock);
-	/*
-	 * Make sure we have not raced with another CPU:
-	 */
-	while (curr) {
-		if (match_entries(curr, entry))
-			goto out_unlock;
-
-		prev = curr;
-		curr = curr->next;
-	}
-
-	curr = alloc_entry();
-	if (curr) {
-		*curr = *entry;
-		curr->count = 0;
-		curr->next = NULL;
-		memcpy(curr->comm, comm, TASK_COMM_LEN);
-
-		smp_mb(); /* Ensure that curr is initialized before insert */
-
-		if (prev)
-			prev->next = curr;
-		else
-			*head = curr;
-	}
- out_unlock:
-	raw_spin_unlock(&table_lock);
-
-	return curr;
-}
-
-/**
- * timer_stats_update_stats - Update the statistics for a timer.
- * @timer:	pointer to either a timer_list or a hrtimer
- * @pid:	the pid of the task which set up the timer
- * @startf:	pointer to the function which did the timer setup
- * @timerf:	pointer to the timer callback function of the timer
- * @comm:	name of the process which set up the timer
- * @tflags:	The flags field of the timer
- *
- * When the timer is already registered, then the event counter is
- * incremented. Otherwise the timer is registered in a free slot.
- */
-void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
-			      void *timerf, char *comm, u32 tflags)
-{
-	/*
-	 * It doesn't matter which lock we take:
-	 */
-	raw_spinlock_t *lock;
-	struct entry *entry, input;
-	unsigned long flags;
-
-	if (likely(!timer_stats_active))
-		return;
-
-	lock = &per_cpu(tstats_lookup_lock, raw_smp_processor_id());
-
-	input.timer = timer;
-	input.start_func = startf;
-	input.expire_func = timerf;
-	input.pid = pid;
-	input.flags = tflags;
-
-	raw_spin_lock_irqsave(lock, flags);
-	if (!timer_stats_active)
-		goto out_unlock;
-
-	entry = tstat_lookup(&input, comm);
-	if (likely(entry))
-		entry->count++;
-	else
-		atomic_inc(&overflow_count);
-
- out_unlock:
-	raw_spin_unlock_irqrestore(lock, flags);
-}
-
-static void print_name_offset(struct seq_file *m, unsigned long addr)
-{
-	char symname[KSYM_NAME_LEN];
-
-	if (lookup_symbol_name(addr, symname) < 0)
-		seq_printf(m, "<%p>", (void *)addr);
-	else
-		seq_printf(m, "%s", symname);
-}
-
-static int tstats_show(struct seq_file *m, void *v)
-{
-	struct timespec period;
-	struct entry *entry;
-	unsigned long ms;
-	long events = 0;
-	ktime_t time;
-	int i;
-
-	mutex_lock(&show_mutex);
-	/*
-	 * If still active then calculate up to now:
-	 */
-	if (timer_stats_active)
-		time_stop = ktime_get();
-
-	time = ktime_sub(time_stop, time_start);
-
-	period = ktime_to_timespec(time);
-	ms = period.tv_nsec / 1000000;
-
-	seq_puts(m, "Timer Stats Version: v0.3\n");
-	seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
-	if (atomic_read(&overflow_count))
-		seq_printf(m, "Overflow: %d entries\n", atomic_read(&overflow_count));
-	seq_printf(m, "Collection: %s\n", timer_stats_active ? "active" : "inactive");
-
-	for (i = 0; i < nr_entries; i++) {
-		entry = entries + i;
-		if (entry->flags & TIMER_DEFERRABLE) {
-			seq_printf(m, "%4luD, %5d %-16s ",
-				entry->count, entry->pid, entry->comm);
-		} else {
-			seq_printf(m, " %4lu, %5d %-16s ",
-				entry->count, entry->pid, entry->comm);
-		}
-
-		print_name_offset(m, (unsigned long)entry->start_func);
-		seq_puts(m, " (");
-		print_name_offset(m, (unsigned long)entry->expire_func);
-		seq_puts(m, ")\n");
-
-		events += entry->count;
-	}
-
-	ms += period.tv_sec * 1000;
-	if (!ms)
-		ms = 1;
-
-	if (events && period.tv_sec)
-		seq_printf(m, "%ld total events, %ld.%03ld events/sec\n",
-			   events, events * 1000 / ms,
-			   (events * 1000000 / ms) % 1000);
-	else
-		seq_printf(m, "%ld total events\n", events);
-
-	mutex_unlock(&show_mutex);
-
-	return 0;
-}
-
-/*
- * After a state change, make sure all concurrent lookup/update
- * activities have stopped:
- */
-static void sync_access(void)
-{
-	unsigned long flags;
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu);
-
-		raw_spin_lock_irqsave(lock, flags);
-		/* nothing */
-		raw_spin_unlock_irqrestore(lock, flags);
-	}
-}
-
-static ssize_t tstats_write(struct file *file, const char __user *buf,
-			    size_t count, loff_t *offs)
-{
-	char ctl[2];
-
-	if (count != 2 || *offs)
-		return -EINVAL;
-
-	if (copy_from_user(ctl, buf, count))
-		return -EFAULT;
-
-	mutex_lock(&show_mutex);
-	switch (ctl[0]) {
-	case '0':
-		if (timer_stats_active) {
-			timer_stats_active = 0;
-			time_stop = ktime_get();
-			sync_access();
-		}
-		break;
-	case '1':
-		if (!timer_stats_active) {
-			reset_entries();
-			time_start = ktime_get();
-			smp_mb();
-			timer_stats_active = 1;
-		}
-		break;
-	default:
-		count = -EINVAL;
-	}
-	mutex_unlock(&show_mutex);
-
-	return count;
-}
-
-static int tstats_open(struct inode *inode, struct file *filp)
-{
-	return single_open(filp, tstats_show, NULL);
-}
-
-static const struct file_operations tstats_fops = {
-	.open		= tstats_open,
-	.read		= seq_read,
-	.write		= tstats_write,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-void __init init_timer_stats(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu)
-		raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu));
-}
-
-static int __init init_tstats_procfs(void)
-{
-	struct proc_dir_entry *pe;
-
-	pe = proc_create("timer_stats", 0644, NULL, &tstats_fops);
-	if (!pe)
-		return -ENOMEM;
-	return 0;
-}
-__initcall(init_tstats_procfs);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c9956440d0e6..12ea4ea619ee 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1471,6 +1471,11 @@ static __init int kprobe_trace_self_tests_init(void)
 
 end:
 	release_all_trace_kprobes();
+	/*
+	 * Wait for the optimizer work to finish. Otherwise it might fiddle
+	 * with probes in already freed __init text.
+	 */
+	wait_for_kprobe_optimizer();
 	if (warn)
 		pr_cont("NG: Some tests are failed. Please check them.\n");
 	else
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c0ab232e3abd..73c018d7df00 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1508,8 +1508,6 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 		return;
 	}
 
-	timer_stats_timer_set_start_info(&dwork->timer);
-
 	dwork->wq = wq;
 	dwork->cpu = cpu;
 	timer->expires = jiffies + delay;