Merge "core_ctrl: Move core control into kernel"

author: Linux Build Service Account <lnxbuild@localhost> 2016-09-29 11:20:17 -0700
committer: Gerrit - the friendly Code Review server <code-review@localhost> 2016-09-29 11:20:17 -0700
commit: bbf8724641bd78107e8a641e6a87d89241f7b4ee (patch)
tree: e3625abd853f3e16b704222b170280920d5e6b15
parent: a18b94fe05a311bf084cb235c0904c48eb04149f (diff)
parent: 59f16ae0345c902c1d09da75e0f89d7e7ddbc54f (diff)
42 files changed, 1724 insertions, 248 deletions
diff --git a/arch/arm/configs/msmcortex_defconfig b/arch/arm/configs/msmcortex_defconfig
index 0a20c52bd3b2..2b35b0f12787 100644
--- a/arch/arm/configs/msmcortex_defconfig
+++ b/arch/arm/configs/msmcortex_defconfig
@@ -459,7 +459,6 @@ CONFIG_TRACER_PKT=y
 CONFIG_QCOM_FORCE_WDOG_BITE_ON_PANIC=y
 CONFIG_MSM_MPM_OF=y
 CONFIG_MSM_EVENT_TIMER=y
-CONFIG_MSM_CORE_CTL_HELPER=y
 CONFIG_QCOM_REMOTEQDSS=y
 CONFIG_MSM_SERVICE_NOTIFIER=y
 CONFIG_MEM_SHARE_QMI_SERVICE=y
diff --git a/arch/arm/configs/msmfalcon_defconfig b/arch/arm/configs/msmfalcon_defconfig
index 0a20c52bd3b2..2b35b0f12787 100644
--- a/arch/arm/configs/msmfalcon_defconfig
+++ b/arch/arm/configs/msmfalcon_defconfig
@@ -459,7 +459,6 @@ CONFIG_TRACER_PKT=y
 CONFIG_QCOM_FORCE_WDOG_BITE_ON_PANIC=y
 CONFIG_MSM_MPM_OF=y
 CONFIG_MSM_EVENT_TIMER=y
-CONFIG_MSM_CORE_CTL_HELPER=y
 CONFIG_QCOM_REMOTEQDSS=y
 CONFIG_MSM_SERVICE_NOTIFIER=y
 CONFIG_MEM_SHARE_QMI_SERVICE=y
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 1d45320ee125..f56a831de043 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -37,6 +37,7 @@
 #include <linux/kallsyms.h>
 #include <linux/proc_fs.h>
 #include <linux/export.h>
+#include <linux/cpumask.h>
 
 #include <asm/hardware/cache-l2x0.h>
 #include <asm/hardware/cache-uniphier.h>
@@ -127,6 +128,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
 	const struct cpumask *affinity = irq_data_get_affinity_mask(d);
 	struct irq_chip *c;
 	bool ret = false;
+	struct cpumask available_cpus;
 
 	/*
 	 * If this is a per-CPU interrupt, or the affinity does not
@@ -135,8 +137,15 @@ static bool migrate_one_irq(struct irq_desc *desc)
 	if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity))
 		return false;
 
+	cpumask_copy(&available_cpus, affinity);
+	cpumask_andnot(&available_cpus, &available_cpus, cpu_isolated_mask);
+	affinity = &available_cpus;
+
 	if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
-		affinity = cpu_online_mask;
+		cpumask_andnot(&available_cpus, cpu_online_mask,
+			       cpu_isolated_mask);
+		if (cpumask_empty(affinity))
+			affinity = cpu_online_mask;
 		ret = true;
 	}
 
diff --git a/arch/arm64/configs/msm-perf_defconfig b/arch/arm64/configs/msm-perf_defconfig
index 84bb603c3142..7f31331933bb 100644
--- a/arch/arm64/configs/msm-perf_defconfig
+++ b/arch/arm64/configs/msm-perf_defconfig
@@ -525,7 +525,6 @@ CONFIG_MSM_PIL_MSS_QDSP6V5=y
 CONFIG_TRACER_PKT=y
 CONFIG_MSM_MPM_OF=y
 CONFIG_MSM_AVTIMER=y
-CONFIG_MSM_CORE_CTL_HELPER=y
 CONFIG_MSM_RPM_RBCPR_STATS_V2_LOG=y
 CONFIG_MSM_RPM_LOG=y
 CONFIG_MSM_RPM_STATS_LOG=y
diff --git a/arch/arm64/configs/msm_defconfig b/arch/arm64/configs/msm_defconfig
index 6119ff12d46d..89bee1463421 100644
--- a/arch/arm64/configs/msm_defconfig
+++ b/arch/arm64/configs/msm_defconfig
@@ -527,7 +527,6 @@ CONFIG_TRACER_PKT=y
 CONFIG_QCOM_FORCE_WDOG_BITE_ON_PANIC=y
 CONFIG_MSM_MPM_OF=y
 CONFIG_MSM_AVTIMER=y
-CONFIG_MSM_CORE_CTL_HELPER=y
 CONFIG_QCOM_REMOTEQDSS=y
 CONFIG_MSM_SERVICE_NOTIFIER=y
 CONFIG_MSM_RPM_RBCPR_STATS_V2_LOG=y
diff --git a/arch/arm64/configs/msmcortex-perf_defconfig b/arch/arm64/configs/msmcortex-perf_defconfig
index 08288e1b5c25..7667669e8aa4 100644
--- a/arch/arm64/configs/msmcortex-perf_defconfig
+++ b/arch/arm64/configs/msmcortex-perf_defconfig
@@ -525,7 +525,6 @@ CONFIG_QCOM_FORCE_WDOG_BITE_ON_PANIC=y
 CONFIG_MSM_MPM_OF=y
 CONFIG_MSM_EVENT_TIMER=y
 CONFIG_MSM_AVTIMER=y
-CONFIG_MSM_CORE_CTL_HELPER=y
 CONFIG_QCOM_REMOTEQDSS=y
 CONFIG_MSM_SERVICE_NOTIFIER=y
 CONFIG_MSM_RPM_RBCPR_STATS_V2_LOG=y
diff --git a/arch/arm64/configs/msmcortex_defconfig b/arch/arm64/configs/msmcortex_defconfig
index 9e2727c4fe1e..c267345858cf 100644
--- a/arch/arm64/configs/msmcortex_defconfig
+++ b/arch/arm64/configs/msmcortex_defconfig
@@ -544,7 +544,6 @@ CONFIG_QCOM_FORCE_WDOG_BITE_ON_PANIC=y
 CONFIG_MSM_MPM_OF=y
 CONFIG_MSM_EVENT_TIMER=y
 CONFIG_MSM_AVTIMER=y
-CONFIG_MSM_CORE_CTL_HELPER=y
 CONFIG_QCOM_REMOTEQDSS=y
 CONFIG_MSM_SERVICE_NOTIFIER=y
 CONFIG_MSM_RPM_RBCPR_STATS_V2_LOG=y
diff --git a/arch/arm64/configs/msmfalcon-perf_defconfig b/arch/arm64/configs/msmfalcon-perf_defconfig
index 39c2d3f71c5a..6ebd60b43c71 100644
--- a/arch/arm64/configs/msmfalcon-perf_defconfig
+++ b/arch/arm64/configs/msmfalcon-perf_defconfig
@@ -522,7 +522,6 @@ CONFIG_QCOM_FORCE_WDOG_BITE_ON_PANIC=y
 CONFIG_MSM_MPM_OF=y
 CONFIG_MSM_EVENT_TIMER=y
 CONFIG_MSM_AVTIMER=y
-CONFIG_MSM_CORE_CTL_HELPER=y
 CONFIG_QCOM_REMOTEQDSS=y
 CONFIG_MSM_SERVICE_NOTIFIER=y
 CONFIG_MSM_RPM_RBCPR_STATS_V2_LOG=y
diff --git a/arch/arm64/configs/msmfalcon_defconfig b/arch/arm64/configs/msmfalcon_defconfig
index a277038b3fc3..01324d89e79e 100644
--- a/arch/arm64/configs/msmfalcon_defconfig
+++ b/arch/arm64/configs/msmfalcon_defconfig
@@ -541,7 +541,6 @@ CONFIG_QCOM_FORCE_WDOG_BITE_ON_PANIC=y
 CONFIG_MSM_MPM_OF=y
 CONFIG_MSM_EVENT_TIMER=y
 CONFIG_MSM_AVTIMER=y
-CONFIG_MSM_CORE_CTL_HELPER=y
 CONFIG_QCOM_REMOTEQDSS=y
 CONFIG_MSM_SERVICE_NOTIFIER=y
 CONFIG_MSM_RPM_RBCPR_STATS_V2_LOG=y
diff --git a/drivers/base/core.c b/drivers/base/core.c
index b7d56c5ea3c6..3ac683dff7de 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -72,6 +72,11 @@ int lock_device_hotplug_sysfs(void)
 	return restart_syscall();
 }
 
+void lock_device_hotplug_assert(void)
+{
+	lockdep_assert_held(&device_hotplug_lock);
+}
+
 #ifdef CONFIG_BLOCK
 static inline int device_is_not_partition(struct device *dev)
 {
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index dee022638fe6..c8bfb6077224 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -180,6 +180,58 @@ static struct attribute_group crash_note_cpu_attr_group = {
 };
 #endif
 
+#ifdef CONFIG_HOTPLUG_CPU
+
+static ssize_t show_cpu_isolated(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
+	ssize_t rc;
+	int cpuid = cpu->dev.id;
+	unsigned int isolated = cpu_isolated(cpuid);
+
+	rc = snprintf(buf, PAGE_SIZE-2, "%d\n", isolated);
+
+	return rc;
+}
+
+static ssize_t __ref store_cpu_isolated(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
+	int err;
+	int cpuid = cpu->dev.id;
+	unsigned int isolated;
+
+	err = kstrtouint(strstrip((char *)buf), 0, &isolated);
+	if (err)
+		return err;
+
+	if (isolated > 1)
+		return -EINVAL;
+
+	if (isolated)
+		sched_isolate_cpu(cpuid);
+	else
+		sched_unisolate_cpu(cpuid);
+
+	return count;
+}
+
+static DEVICE_ATTR(isolate, 0644, show_cpu_isolated, store_cpu_isolated);
+
+static struct attribute *cpu_isolated_attrs[] = {
+	&dev_attr_isolate.attr,
+	NULL
+};
+
+static struct attribute_group cpu_isolated_attr_group = {
+	.attrs = cpu_isolated_attrs,
+};
+
+#endif
+
 #ifdef CONFIG_SCHED_HMP
 
 static ssize_t show_sched_static_cpu_pwr_cost(struct device *dev,
@@ -280,6 +332,9 @@ static const struct attribute_group *common_cpu_attr_groups[] = {
 #ifdef CONFIG_SCHED_HMP
 	&sched_hmp_cpu_attr_group,
 #endif
+#ifdef CONFIG_HOTPLUG_CPU
+	&cpu_isolated_attr_group,
+#endif
 	NULL
 };
 
@@ -290,6 +345,9 @@ static const struct attribute_group *hotplugable_cpu_attr_groups[] = {
 #ifdef CONFIG_SCHED_HMP
 	&sched_hmp_cpu_attr_group,
 #endif
+#ifdef CONFIG_HOTPLUG_CPU
+	&cpu_isolated_attr_group,
+#endif
 	NULL
 };
 
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index b622b9541279..a045b9a940e8 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -625,12 +625,15 @@ static void smp_callback(void *v)
 static int cpuidle_latency_notify(struct notifier_block *b,
 		unsigned long l, void *v)
 {
-	const struct cpumask *cpus;
+	struct cpumask cpus;
 
-	cpus = v ?: cpu_online_mask;
+	if (v)
+		cpumask_andnot(&cpus, v, cpu_isolated_mask);
+	else
+		cpumask_andnot(&cpus, cpu_online_mask, cpu_isolated_mask);
 
 	preempt_disable();
-	smp_call_function_many(cpus, smp_callback, NULL, 1);
+	smp_call_function_many(&cpus, smp_callback, NULL, 1);
 	preempt_enable();
 
 	return NOTIFY_OK;
diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 9c27344165be..c32eafbd38fd 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -716,14 +716,6 @@ config MSM_KERNEL_PROTECT_TEST
           read-only.  This test is FATAL whether it passes or fails!
           Success is signaled by a stage-2 fault.
 
-config MSM_CORE_CTL_HELPER
-	tristate "Core control helper functions for dynamically hotplug CPUs"
-	help
-	  Provide helper functions for core control driver. Core control
-	  driver dynamically hotplugs CPUs from kernel based on current
-	  system load and state. It also supports limiting min and
-	  max online CPUs from userspace.
-
 config QCOM_REMOTEQDSS
 	bool "Allow debug tools to enable events on other processors"
 	depends on QCOM_SCM && DEBUG_FS
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index d9134a558be6..434a114c000f 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -38,7 +38,6 @@ obj-$(CONFIG_MEM_SHARE_QMI_SERVICE)		+= memshare/
 obj-$(CONFIG_MSM_PIL_SSR_GENERIC) += subsys-pil-tz.o
 obj-$(CONFIG_MSM_PIL_MSS_QDSP6V5) += pil-q6v5.o pil-msa.o pil-q6v5-mss.o
 obj-$(CONFIG_MSM_PIL)   +=      peripheral-loader.o
-obj-$(CONFIG_MSM_CORE_CTL_HELPER) += core_ctl_helper.o
 obj-$(CONFIG_MSM_PFE_WA) += pfe-wa.o
 obj-$(CONFIG_ARCH_MSM8996) += msm_cpu_voltage.o
 
diff --git a/drivers/soc/qcom/core_ctl_helper.c b/drivers/soc/qcom/core_ctl_helper.c
deleted file mode 100644
index 88201412128e..000000000000
--- a/drivers/soc/qcom/core_ctl_helper.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2014-2016, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/cpu.h>
-#include <linux/cpufreq.h>
-#include <linux/ktime.h>
-#include <linux/hrtimer.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <trace/events/power.h>
-#include <soc/qcom/core_ctl.h>
-
-void core_ctl_trace(int type, int cpu, int arg1, int arg2, int arg3)
-{
-	switch (type) {
-	case CORE_CTL_EVAL_NEED:
-		trace_core_ctl_eval_need(cpu, arg1, arg2, arg3);
-	break;
-
-	case CORE_CTL_SET_BUSY:
-		 trace_core_ctl_set_busy(cpu, arg1, arg2, arg3);
-	break;
-	};
-}
-EXPORT_SYMBOL(core_ctl_trace);
-
-void core_ctl_block_hotplug(void)
-{
-	get_online_cpus();
-}
-EXPORT_SYMBOL(core_ctl_block_hotplug);
-
-void core_ctl_unblock_hotplug(void)
-{
-	put_online_cpus();
-}
-EXPORT_SYMBOL(core_ctl_unblock_hotplug);
-
-s64 core_ctl_get_time(void)
-{
-	return ktime_to_ms(ktime_get());
-}
-EXPORT_SYMBOL(core_ctl_get_time);
-
-struct cpufreq_policy *core_ctl_get_policy(int cpu)
-{
-	return cpufreq_cpu_get(cpu);
-}
-EXPORT_SYMBOL(core_ctl_get_policy);
-
-void core_ctl_put_policy(struct cpufreq_policy *policy)
-{
-	cpufreq_cpu_put(policy);
-}
-EXPORT_SYMBOL(core_ctl_put_policy);
-
-struct device *core_ctl_find_cpu_device(unsigned cpu)
-{
-	return get_cpu_device(cpu);
-}
-EXPORT_SYMBOL(core_ctl_find_cpu_device);
-
-int __ref core_ctl_online_core(unsigned int cpu)
-{
-	int ret = -EINVAL;
-	struct device *dev = get_cpu_device(cpu);
-
-	if (dev) {
-		lock_device_hotplug();
-		ret = device_online(dev);
-		unlock_device_hotplug();
-	}
-	return ret;
-}
-EXPORT_SYMBOL(core_ctl_online_core);
-
-int __ref core_ctl_offline_core(unsigned int cpu)
-{
-	int ret = -EINVAL;
-	struct device *dev = get_cpu_device(cpu);
-
-	if (dev) {
-		lock_device_hotplug();
-		ret = device_offline(dev);
-		unlock_device_hotplug();
-	}
-	return ret;
-}
-EXPORT_SYMBOL(core_ctl_offline_core);
diff --git a/drivers/soc/qcom/watchdog_v2.c b/drivers/soc/qcom/watchdog_v2.c
index aa20705b9adc..8f58eaa537b1 100644
--- a/drivers/soc/qcom/watchdog_v2.c
+++ b/drivers/soc/qcom/watchdog_v2.c
@@ -360,7 +360,7 @@ static void ping_other_cpus(struct msm_watchdog_data *wdog_dd)
 	cpumask_clear(&wdog_dd->alive_mask);
 	smp_mb();
 	for_each_cpu(cpu, cpu_online_mask) {
-		if (!cpu_idle_pc_state[cpu])
+		if (!cpu_idle_pc_state[cpu] && !cpu_isolated(cpu))
 			smp_call_function_single(cpu, keep_alive_response,
 						 wdog_dd, 1);
 	}
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 59915ea5373c..0eab4811ee92 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -53,6 +53,7 @@ extern int nr_cpu_ids;
  *     cpu_present_mask - has bit 'cpu' set iff cpu is populated
  *     cpu_online_mask  - has bit 'cpu' set iff cpu available to scheduler
  *     cpu_active_mask  - has bit 'cpu' set iff cpu available to migration
+ *     cpu_isolated_mask- has bit 'cpu' set iff cpu isolated
  *
  *  If !CONFIG_HOTPLUG_CPU, present == possible, and active == online.
  *
@@ -89,25 +90,30 @@ extern const struct cpumask *const cpu_possible_mask;
 extern const struct cpumask *const cpu_online_mask;
 extern const struct cpumask *const cpu_present_mask;
 extern const struct cpumask *const cpu_active_mask;
+extern const struct cpumask *const cpu_isolated_mask;
 
 #if NR_CPUS > 1
 #define num_online_cpus()	cpumask_weight(cpu_online_mask)
 #define num_possible_cpus()	cpumask_weight(cpu_possible_mask)
 #define num_present_cpus()	cpumask_weight(cpu_present_mask)
 #define num_active_cpus()	cpumask_weight(cpu_active_mask)
+#define num_isolated_cpus()	cpumask_weight(cpu_isolated_mask)
 #define cpu_online(cpu)		cpumask_test_cpu((cpu), cpu_online_mask)
 #define cpu_possible(cpu)	cpumask_test_cpu((cpu), cpu_possible_mask)
 #define cpu_present(cpu)	cpumask_test_cpu((cpu), cpu_present_mask)
 #define cpu_active(cpu)		cpumask_test_cpu((cpu), cpu_active_mask)
+#define cpu_isolated(cpu)	cpumask_test_cpu((cpu), cpu_isolated_mask)
 #else
 #define num_online_cpus()	1U
 #define num_possible_cpus()	1U
 #define num_present_cpus()	1U
 #define num_active_cpus()	1U
+#define num_isolated_cpus()	0U
 #define cpu_online(cpu)		((cpu) == 0)
 #define cpu_possible(cpu)	((cpu) == 0)
 #define cpu_present(cpu)	((cpu) == 0)
 #define cpu_active(cpu)		((cpu) == 0)
+#define cpu_isolated(cpu)	((cpu) == 0)
 #endif
 
 /* verify cpu argument to cpumask_* operators */
@@ -714,12 +720,14 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
 #define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask)
 #define for_each_online_cpu(cpu)   for_each_cpu((cpu), cpu_online_mask)
 #define for_each_present_cpu(cpu)  for_each_cpu((cpu), cpu_present_mask)
+#define for_each_isolated_cpu(cpu) for_each_cpu((cpu), cpu_isolated_mask)
 
 /* Wrappers for arch boot code to manipulate normally-constant masks */
 void set_cpu_possible(unsigned int cpu, bool possible);
 void set_cpu_present(unsigned int cpu, bool present);
 void set_cpu_online(unsigned int cpu, bool online);
 void set_cpu_active(unsigned int cpu, bool active);
+void set_cpu_isolated(unsigned int cpu, bool isolated);
 void init_cpu_present(const struct cpumask *src);
 void init_cpu_possible(const struct cpumask *src);
 void init_cpu_online(const struct cpumask *src);
diff --git a/include/linux/device.h b/include/linux/device.h
index 9f27351c6b9c..4b4e2d5ce6e7 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1023,6 +1023,7 @@ static inline bool device_supports_offline(struct device *dev)
 extern void lock_device_hotplug(void);
 extern void unlock_device_hotplug(void);
 extern int lock_device_hotplug_sysfs(void);
+extern void lock_device_hotplug_assert(void);
 extern int device_offline(struct device *dev);
 extern int device_online(struct device *dev);
 extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 2ead22dd74a0..952adcacc4cf 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -53,6 +53,7 @@ enum hrtimer_restart {
  *
  * 0x00		inactive
  * 0x01		enqueued into rbtree
+ * 0x02		timer is pinned to a cpu
  *
  * The callback state is not part of the timer->state because clearing it would
  * mean touching the timer after the callback, this makes it impossible to free
@@ -72,6 +73,8 @@ enum hrtimer_restart {
  */
 #define HRTIMER_STATE_INACTIVE	0x00
 #define HRTIMER_STATE_ENQUEUED	0x01
+#define HRTIMER_PINNED_SHIFT	1
+#define HRTIMER_STATE_PINNED	(1 << HRTIMER_PINNED_SHIFT)
 
 /**
  * struct hrtimer - the basic hrtimer structure
@@ -357,6 +360,9 @@ DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
 /* Exported timer functions: */
 
+/* To be used from cpusets, only */
+extern void hrtimer_quiesce_cpu(void *cpup);
+
 /* Initialize timers: */
 extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock,
 			 enum hrtimer_mode mode);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4701e0403167..a395d8a9ff73 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -363,6 +363,41 @@ extern cpumask_var_t cpu_isolated_map;
 
 extern int runqueue_is_locked(int cpu);
 
+#ifdef CONFIG_HOTPLUG_CPU
+extern int sched_isolate_count(const cpumask_t *mask, bool include_offline);
+extern int sched_isolate_cpu(int cpu);
+extern int sched_unisolate_cpu(int cpu);
+extern int sched_unisolate_cpu_unlocked(int cpu);
+#else
+static inline int sched_isolate_count(const cpumask_t *mask,
+				      bool include_offline)
+{
+	cpumask_t count_mask;
+
+	if (include_offline)
+		cpumask_andnot(&count_mask, mask, cpu_online_mask);
+	else
+		return 0;
+
+	return cpumask_weight(&count_mask);
+}
+
+static inline int sched_isolate_cpu(int cpu)
+{
+	return 0;
+}
+
+static inline int sched_unisolate_cpu(int cpu)
+{
+	return 0;
+}
+
+static inline int sched_unisolate_cpu_unlocked(int cpu)
+{
+	return 0;
+}
+#endif
+
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 extern void nohz_balance_enter_idle(int cpu);
 extern void set_cpu_sd_state_idle(void);
@@ -409,6 +444,8 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
 extern unsigned int  softlockup_panic;
 extern unsigned int  hardlockup_panic;
 void lockup_detector_init(void);
+extern void watchdog_enable(unsigned int cpu);
+extern void watchdog_disable(unsigned int cpu);
 #else
 static inline void touch_softlockup_watchdog_sched(void)
 {
@@ -425,6 +462,12 @@ static inline void touch_all_softlockup_watchdogs(void)
 static inline void lockup_detector_init(void)
 {
 }
+static inline void watchdog_enable(unsigned int cpu)
+{
+}
+static inline void watchdog_disable(unsigned int cpu)
+{
+}
 #endif
 
 #ifdef CONFIG_DETECT_HUNG_TASK
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 5bf3ddade19c..1732697ea419 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -161,7 +161,15 @@ extern void __tick_nohz_task_switch(void);
 #else
 static inline int housekeeping_any_cpu(void)
 {
-	return smp_processor_id();
+	cpumask_t available;
+	int cpu;
+
+	cpumask_andnot(&available, cpu_online_mask, cpu_isolated_mask);
+	cpu = cpumask_any(&available);
+	if (cpu >= nr_cpu_ids)
+		cpu = smp_processor_id();
+
+	return cpu;
 }
 static inline bool tick_nohz_full_enabled(void) { return false; }
 static inline bool tick_nohz_full_cpu(int cpu) { return false; }
@@ -187,7 +195,7 @@ static inline bool is_housekeeping_cpu(int cpu)
 	if (tick_nohz_full_enabled())
 		return cpumask_test_cpu(cpu, housekeeping_mask);
 #endif
-	return true;
+	return !cpu_isolated(cpu);
 }
 
 static inline void housekeeping_affine(struct task_struct *t)
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 7a5602e19e87..b1617e8932b2 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -182,6 +182,9 @@ extern void set_timer_slack(struct timer_list *time, int slack_hz);
  */
 #define NEXT_TIMER_MAX_DELTA	((1UL << 30) - 1)
 
+/* To be used from cpusets, only */
+extern void timer_quiesce_cpu(void *cpup);
+
 /*
  * Timer-statistics info:
  */
diff --git a/include/soc/qcom/core_ctl.h b/include/soc/qcom/core_ctl.h
deleted file mode 100644
index 08b43058b37c..000000000000
--- a/include/soc/qcom/core_ctl.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __SOC_QCOM_CORE_CTL_H
-#define __SOC_QCOM_CORE_CTL_H
-
-enum {
-	CORE_CTL_EVAL_NEED,
-	CORE_CTL_SET_BUSY,
-	CORE_CTL_N_TRACE_EVENTS,
-};
-
-extern void core_ctl_block_hotplug(void);
-extern void core_ctl_unblock_hotplug(void);
-extern s64 core_ctl_get_time(void);
-extern struct cpufreq_policy *core_ctl_get_policy(int cpu);
-extern void core_ctl_put_policy(struct cpufreq_policy *policy);
-extern struct device *core_ctl_find_cpu_device(unsigned cpu);
-extern int core_ctl_online_core(unsigned int cpu);
-extern int core_ctl_offline_core(unsigned int cpu);
-
-#define USE_CORE_CTL_TRACE
-extern void core_ctl_trace(int type, int cpu, int arg1, int arg2, int arg3);
-
-#endif
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 1ef5ec3eaf70..3990efdd0cc0 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -1244,6 +1244,41 @@ TRACE_EVENT(sched_get_nr_running_avg,
 		__entry->avg, __entry->big_avg, __entry->iowait_avg)
 );
 
+/**
+ * sched_isolate - called when cores are isolated/unisolated
+ *
+ * @acutal_mask: mask of cores actually isolated/unisolated
+ * @req_mask: mask of cores requested isolated/unisolated
+ * @online_mask: cpu online mask
+ * @time: amount of time in us it took to isolate/unisolate
+ * @isolate: 1 if isolating, 0 if unisolating
+ *
+ */
+TRACE_EVENT(sched_isolate,
+
+	TP_PROTO(unsigned int requested_cpu, unsigned int isolated_cpus,
+		 u64 start_time, unsigned char isolate),
+
+	TP_ARGS(requested_cpu, isolated_cpus, start_time, isolate),
+
+	TP_STRUCT__entry(
+		__field(u32, requested_cpu)
+		__field(u32, isolated_cpus)
+		__field(u32, time)
+		__field(unsigned char, isolate)
+	),
+
+	TP_fast_assign(
+		__entry->requested_cpu = requested_cpu;
+		__entry->isolated_cpus = isolated_cpus;
+		__entry->time = div64_u64(sched_clock() - start_time, 1000);
+		__entry->isolate = isolate;
+	),
+
+	TP_printk("iso cpu=%u cpus=0x%x time=%u us isolated=%d",
+		  __entry->requested_cpu, __entry->isolated_cpus,
+		  __entry->time, __entry->isolate)
+);
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
diff --git a/init/Kconfig b/init/Kconfig
index 9ad1ae9d9da8..6020a351c57b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1170,6 +1170,16 @@ config SCHED_HMP_CSTATE_AWARE
 	  with CPUs C-state. If this is enabled, scheduler places tasks
 	  onto the shallowest C-state CPU among the most power efficient CPUs.
 
+config SCHED_CORE_CTL
+	bool "QTI Core Control"
+	depends on SMP
+	help
+	  This options enables the core control functionality in
+	  the scheduler. Core control automatically offline and
+	  online cores based on cpu load and utilization.
+
+	  If unsure, say N here.
+
 config CHECKPOINT_RESTORE
 	bool "Checkpoint/restore support" if EXPERT
 	select PROC_CHILDREN
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 1cfd381642da..3c97f5b88a07 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -768,6 +768,10 @@ static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
 const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
 EXPORT_SYMBOL(cpu_active_mask);
 
+static DECLARE_BITMAP(cpu_isolated_bits, CONFIG_NR_CPUS) __read_mostly;
+const struct cpumask *const cpu_isolated_mask = to_cpumask(cpu_isolated_bits);
+EXPORT_SYMBOL(cpu_isolated_mask);
+
 void set_cpu_possible(unsigned int cpu, bool possible)
 {
 	if (possible)
@@ -802,6 +806,14 @@ void set_cpu_active(unsigned int cpu, bool active)
 		cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits));
 }
 
+void set_cpu_isolated(unsigned int cpu, bool isolated)
+{
+	if (isolated)
+		cpumask_set_cpu(cpu, to_cpumask(cpu_isolated_bits));
+	else
+		cpumask_clear_cpu(cpu, to_cpumask(cpu_isolated_bits));
+}
+
 void init_cpu_present(const struct cpumask *src)
 {
 	cpumask_copy(to_cpumask(cpu_present_bits), src);
@@ -817,6 +829,11 @@ void init_cpu_online(const struct cpumask *src)
 	cpumask_copy(to_cpumask(cpu_online_bits), src);
 }
 
+void init_cpu_isolated(const struct cpumask *src)
+{
+	cpumask_copy(to_cpumask(cpu_isolated_bits), src);
+}
+
 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
 
 void idle_notifier_register(struct notifier_block *n)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d6ec580584b6..5beb88f11671 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3386,7 +3386,8 @@ static int perf_event_read(struct perf_event *event, bool group)
 	 * If event is enabled and currently active on a CPU, update the
 	 * value in the event structure:
 	 */
-	if (event->state == PERF_EVENT_STATE_ACTIVE) {
+	if (event->state == PERF_EVENT_STATE_ACTIVE &&
+						!cpu_isolated(event->oncpu)) {
 		struct perf_read_data data = {
 			.event = event,
 			.group = group,
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 011f8c4c63da..104432f3d311 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -11,6 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/ratelimit.h>
 #include <linux/irq.h>
+#include <linux/cpumask.h>
 
 #include "internals.h"
 
@@ -20,6 +21,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
 	const struct cpumask *affinity = d->common->affinity;
 	struct irq_chip *c;
 	bool ret = false;
+	struct cpumask available_cpus;
 
 	/*
 	 * If this is a per-CPU interrupt, or the affinity does not
@@ -29,8 +31,15 @@ static bool migrate_one_irq(struct irq_desc *desc)
 	    !cpumask_test_cpu(smp_processor_id(), affinity))
 		return false;
 
+	cpumask_copy(&available_cpus, affinity);
+	cpumask_andnot(&available_cpus, &available_cpus, cpu_isolated_mask);
+	affinity = &available_cpus;
+
 	if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
-		affinity = cpu_online_mask;
+		cpumask_andnot(&available_cpus, cpu_online_mask,
+							cpu_isolated_mask);
+		if (cpumask_empty(affinity))
+			affinity = cpu_online_mask;
 		ret = true;
 	}
 
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 8ecc7b3f7dd9..69c32c42080f 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -45,6 +45,7 @@
 #include <linux/seq_file.h>
 #include <linux/irq.h>
 #include <linux/irqdesc.h>
+#include <linux/cpumask.h>
 
 #include <linux/uaccess.h>
 #include <linux/export.h>
@@ -447,6 +448,9 @@ EXPORT_SYMBOL_GPL(pm_qos_request);
 
 int pm_qos_request_for_cpu(int pm_qos_class, int cpu)
 {
+	if (cpu_isolated(cpu))
+		return INT_MAX;
+
 	return pm_qos_array[pm_qos_class]->constraints->target_per_cpu[cpu];
 }
 EXPORT_SYMBOL(pm_qos_request_for_cpu);
@@ -469,6 +473,9 @@ int pm_qos_request_for_cpumask(int pm_qos_class, struct cpumask *mask)
 	val = c->default_value;
 
 	for_each_cpu(cpu, mask) {
+		if (cpu_isolated(cpu))
+			continue;
+
 		switch (c->type) {
 		case PM_QOS_MIN:
 			if (c->target_per_cpu[cpu] < val)
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 1f159743ebfc..508b65690288 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -20,3 +20,4 @@ obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
 obj-$(CONFIG_SCHEDSTATS) += stats.o
 obj-$(CONFIG_SCHED_DEBUG) += debug.o
 obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
+obj-$(CONFIG_SCHED_CORE_CTL) += core_ctl.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7474463b9835..7b7f1961fd10 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -74,6 +74,7 @@
 #include <linux/binfmts.h>
 #include <linux/context_tracking.h>
 #include <linux/compiler.h>
+#include <linux/irq.h>
 
 #include <asm/switch_to.h>
 #include <asm/tlb.h>
@@ -1229,6 +1230,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
 	struct rq *rq;
 	unsigned int dest_cpu;
 	int ret = 0;
+	cpumask_t allowed_mask;
 
 	rq = task_rq_lock(p, &flags);
 
@@ -1244,16 +1246,22 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
 	if (cpumask_equal(&p->cpus_allowed, new_mask))
 		goto out;
 
-	dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
+	cpumask_andnot(&allowed_mask, new_mask, cpu_isolated_mask);
+
+	dest_cpu = cpumask_any_and(cpu_active_mask, &allowed_mask);
 	if (dest_cpu >= nr_cpu_ids) {
-		ret = -EINVAL;
-		goto out;
+		dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
+		if (dest_cpu >= nr_cpu_ids) {
+			ret = -EINVAL;
+			goto out;
+		}
+		cpumask_copy(&allowed_mask, new_mask);
 	}
 
 	do_set_cpus_allowed(p, new_mask);
 
 	/* Can the task run on the task's current CPU? If so, we're done */
-	if (cpumask_test_cpu(task_cpu(p), new_mask))
+	if (cpumask_test_cpu(task_cpu(p), &allowed_mask))
 		goto out;
 
 	if (task_running(rq, p) || p->state == TASK_WAKING) {
@@ -1577,12 +1585,13 @@ EXPORT_SYMBOL_GPL(kick_process);
 /*
  * ->cpus_allowed is protected by both rq->lock and p->pi_lock
  */
-static int select_fallback_rq(int cpu, struct task_struct *p)
+static int select_fallback_rq(int cpu, struct task_struct *p, bool allow_iso)
 {
 	int nid = cpu_to_node(cpu);
 	const struct cpumask *nodemask = NULL;
 	enum { cpuset, possible, fail } state = cpuset;
 	int dest_cpu;
+	int isolated_candidate = -1;
 
 	/*
 	 * If the node that the cpu is on has been offlined, cpu_to_node()
@@ -1598,6 +1607,8 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 				continue;
 			if (!cpu_active(dest_cpu))
 				continue;
+			if (cpu_isolated(dest_cpu))
+				continue;
 			if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
 				return dest_cpu;
 		}
@@ -1610,6 +1621,16 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 				continue;
 			if (!cpu_active(dest_cpu))
 				continue;
+			if (cpu_isolated(dest_cpu)) {
+				if (allow_iso)
+					isolated_candidate = dest_cpu;
+				continue;
+			}
+			goto out;
+		}
+
+		if (isolated_candidate != -1) {
+			dest_cpu = isolated_candidate;
 			goto out;
 		}
 
@@ -1655,6 +1676,8 @@ out:
 static inline
 int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
 {
+	bool allow_isolated = (p->flags & PF_KTHREAD);
+
 	lockdep_assert_held(&p->pi_lock);
 
 	if (p->nr_cpus_allowed > 1)
@@ -1671,8 +1694,9 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
 	 *   not worry about this generic constraint ]
 	 */
 	if (unlikely(!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) ||
-		     !cpu_online(cpu)))
-		cpu = select_fallback_rq(task_cpu(p), p);
+		     !cpu_online(cpu)) ||
+		     (cpu_isolated(cpu) && !allow_isolated))
+		cpu = select_fallback_rq(task_cpu(p), p, allow_isolated);
 
 	return cpu;
 }
@@ -2956,7 +2980,7 @@ void sched_exec(void)
 	if (dest_cpu == smp_processor_id())
 		goto unlock;
 
-	if (likely(cpu_active(dest_cpu))) {
+	if (likely(cpu_active(dest_cpu) && likely(!cpu_isolated(dest_cpu)))) {
 		struct migration_arg arg = { p, dest_cpu };
 
 		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
@@ -5414,18 +5438,22 @@ static struct task_struct fake_task = {
 };
 
 /*
- * Migrate all tasks from the rq, sleeping tasks will be migrated by
- * try_to_wake_up()->select_task_rq().
+ * Migrate all tasks (not pinned if pinned argument say so) from the rq,
+ * sleeping tasks will be migrated by try_to_wake_up()->select_task_rq().
  *
  * Called with rq->lock held even though we'er in stop_machine() and
  * there's no concurrency possible, we hold the required locks anyway
  * because of lock validation efforts.
  */
-static void migrate_tasks(struct rq *dead_rq)
+static void migrate_tasks(struct rq *dead_rq, bool migrate_pinned_tasks)
 {
 	struct rq *rq = dead_rq;
 	struct task_struct *next, *stop = rq->stop;
 	int dest_cpu;
+	unsigned int num_pinned_kthreads = 1; /* this thread */
+	cpumask_t avail_cpus;
+
+	cpumask_andnot(&avail_cpus, cpu_online_mask, cpu_isolated_mask);
 
 	/*
 	 * Fudge the rq selection such that the below task selection loop
@@ -5447,10 +5475,12 @@ static void migrate_tasks(struct rq *dead_rq)
 
 	for (;;) {
 		/*
-		 * There's this thread running, bail when that's the only
-		 * remaining thread.
+		 * There's this thread running + pinned threads, bail when
+		 * that's the only remaining threads.
 		 */
-		if (rq->nr_running == 1)
+		if ((migrate_pinned_tasks && rq->nr_running == 1) ||
+		   (!migrate_pinned_tasks &&
+		    rq->nr_running == num_pinned_kthreads))
 			break;
 
 		/*
@@ -5461,6 +5491,13 @@ static void migrate_tasks(struct rq *dead_rq)
 		BUG_ON(!next);
 		next->sched_class->put_prev_task(rq, next);
 
+		if (!migrate_pinned_tasks && next->flags & PF_KTHREAD &&
+			!cpumask_intersects(&avail_cpus, &next->cpus_allowed)) {
+			lockdep_unpin_lock(&rq->lock);
+			num_pinned_kthreads += 1;
+			continue;
+		}
+
 		/*
 		 * Rules for changing task_struct::cpus_allowed are holding
 		 * both pi_lock and rq->lock, such that holding either
@@ -5486,7 +5523,7 @@ static void migrate_tasks(struct rq *dead_rq)
 		}
 
 		/* Find suitable destination for @next, with force if needed. */
-		dest_cpu = select_fallback_rq(dead_rq->cpu, next);
+		dest_cpu = select_fallback_rq(dead_rq->cpu, next, false);
 
 		rq = __migrate_task(rq, next, dest_cpu);
 		if (rq != dead_rq) {
@@ -5502,6 +5539,222 @@ static void migrate_tasks(struct rq *dead_rq)
 
 	rq->stop = stop;
 }
+
+static void set_rq_online(struct rq *rq);
+static void set_rq_offline(struct rq *rq);
+
+int do_isolation_work_cpu_stop(void *data)
+{
+	unsigned long flags;
+	unsigned int cpu = smp_processor_id();
+	struct rq *rq = cpu_rq(cpu);
+
+	watchdog_disable(cpu);
+
+	irq_migrate_all_off_this_cpu();
+
+	sched_ttwu_pending();
+	/* Update our root-domain */
+	raw_spin_lock_irqsave(&rq->lock, flags);
+
+	if (rq->rd) {
+		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
+		set_rq_offline(rq);
+	}
+
+	migrate_tasks(rq, false);
+	raw_spin_unlock_irqrestore(&rq->lock, flags);
+
+	/*
+	 * We might have been in tickless state. Clear NOHZ flags to avoid
+	 * us being kicked for helping out with balancing
+	 */
+	nohz_balance_clear_nohz_mask(cpu);
+	return 0;
+}
+
+int do_unisolation_work_cpu_stop(void *data)
+{
+	watchdog_enable(smp_processor_id());
+	return 0;
+}
+
+static void init_sched_groups_capacity(int cpu, struct sched_domain *sd);
+
+static void sched_update_group_capacities(int cpu)
+{
+	struct sched_domain *sd;
+
+	mutex_lock(&sched_domains_mutex);
+	rcu_read_lock();
+
+	for_each_domain(cpu, sd) {
+		int balance_cpu = group_balance_cpu(sd->groups);
+
+		init_sched_groups_capacity(cpu, sd);
+		/*
+		 * Need to ensure this is also called with balancing
+		 * cpu.
+		*/
+		if (cpu != balance_cpu)
+			init_sched_groups_capacity(balance_cpu, sd);
+	}
+
+	rcu_read_unlock();
+	mutex_unlock(&sched_domains_mutex);
+}
+
+static unsigned int cpu_isolation_vote[NR_CPUS];
+
+int sched_isolate_count(const cpumask_t *mask, bool include_offline)
+{
+	cpumask_t count_mask = CPU_MASK_NONE;
+
+	if (include_offline) {
+		cpumask_complement(&count_mask, cpu_online_mask);
+		cpumask_or(&count_mask, &count_mask, cpu_isolated_mask);
+		cpumask_and(&count_mask, &count_mask, mask);
+	} else {
+		cpumask_and(&count_mask, mask, cpu_isolated_mask);
+	}
+
+	return cpumask_weight(&count_mask);
+}
+
+/*
+ * 1) CPU is isolated and cpu is offlined:
+ *	Unisolate the core.
+ * 2) CPU is not isolated and CPU is offlined:
+ *	No action taken.
+ * 3) CPU is offline and request to isolate
+ *	Request ignored.
+ * 4) CPU is offline and isolated:
+ *	Not a possible state.
+ * 5) CPU is online and request to isolate
+ *	Normal case: Isolate the CPU
+ * 6) CPU is not isolated and comes back online
+ *	Nothing to do
+ *
+ * Note: The client calling sched_isolate_cpu() is repsonsible for ONLY
+ * calling sched_unisolate_cpu() on a CPU that the client previously isolated.
+ * Client is also responsible for unisolating when a core goes offline
+ * (after CPU is marked offline).
+ */
+int sched_isolate_cpu(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	cpumask_t avail_cpus;
+	int ret_code = 0;
+	u64 start_time;
+
+	if (trace_sched_isolate_enabled())
+		start_time = sched_clock();
+
+	lock_device_hotplug();
+
+	cpumask_andnot(&avail_cpus, cpu_online_mask, cpu_isolated_mask);
+
+	/* We cannot isolate ALL cpus in the system */
+	if (cpumask_weight(&avail_cpus) == 1) {
+		ret_code = -EINVAL;
+		goto out;
+	}
+
+	if (!cpu_online(cpu)) {
+		ret_code = -EINVAL;
+		goto out;
+	}
+
+	if (++cpu_isolation_vote[cpu] > 1)
+		goto out;
+
+	set_cpu_isolated(cpu, true);
+	cpumask_clear_cpu(cpu, &avail_cpus);
+
+	/* Migrate timers */
+	smp_call_function_any(&avail_cpus, hrtimer_quiesce_cpu, &cpu, 1);
+	smp_call_function_any(&avail_cpus, timer_quiesce_cpu, &cpu, 1);
+
+	migrate_sync_cpu(cpu, cpumask_first(&avail_cpus));
+	stop_cpus(cpumask_of(cpu), do_isolation_work_cpu_stop, 0);
+
+	clear_hmp_request(cpu);
+	calc_load_migrate(rq);
+	update_max_interval();
+	sched_update_group_capacities(cpu);
+
+out:
+	unlock_device_hotplug();
+	trace_sched_isolate(cpu, cpumask_bits(cpu_isolated_mask)[0],
+			    start_time, 1);
+	return ret_code;
+}
+
+/*
+ * Note: The client calling sched_isolate_cpu() is repsonsible for ONLY
+ * calling sched_unisolate_cpu() on a CPU that the client previously isolated.
+ * Client is also responsible for unisolating when a core goes offline
+ * (after CPU is marked offline).
+ */
+int sched_unisolate_cpu_unlocked(int cpu)
+{
+	int ret_code = 0;
+	struct rq *rq = cpu_rq(cpu);
+	u64 start_time;
+
+	if (trace_sched_isolate_enabled())
+		start_time = sched_clock();
+
+	lock_device_hotplug_assert();
+
+	if (!cpu_isolation_vote[cpu]) {
+		ret_code = -EINVAL;
+		goto out;
+	}
+
+	if (--cpu_isolation_vote[cpu])
+		goto out;
+
+	if (cpu_online(cpu)) {
+		unsigned long flags;
+
+		raw_spin_lock_irqsave(&rq->lock, flags);
+		rq->age_stamp = sched_clock_cpu(cpu);
+		if (rq->rd) {
+			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
+			set_rq_online(rq);
+		}
+		raw_spin_unlock_irqrestore(&rq->lock, flags);
+	}
+
+	set_cpu_isolated(cpu, false);
+	update_max_interval();
+	sched_update_group_capacities(cpu);
+
+	if (cpu_online(cpu)) {
+		stop_cpus(cpumask_of(cpu), do_unisolation_work_cpu_stop, 0);
+
+		/* Kick CPU to immediately do load balancing */
+		if (!test_and_set_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)))
+			smp_send_reschedule(cpu);
+	}
+
+out:
+	trace_sched_isolate(cpu, cpumask_bits(cpu_isolated_mask)[0],
+			    start_time, 0);
+	return ret_code;
+}
+
+int sched_unisolate_cpu(int cpu)
+{
+	int ret_code;
+
+	lock_device_hotplug();
+	ret_code = sched_unisolate_cpu_unlocked(cpu);
+	unlock_device_hotplug();
+	return ret_code;
+}
+
 #endif /* CONFIG_HOTPLUG_CPU */
 
 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
@@ -5748,13 +6001,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		sched_ttwu_pending();
 		/* Update our root-domain */
 		raw_spin_lock_irqsave(&rq->lock, flags);
-		migrate_sync_cpu(cpu);
+		migrate_sync_cpu(cpu, smp_processor_id());
 
 		if (rq->rd) {
 			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
 			set_rq_offline(rq);
 		}
-		migrate_tasks(rq);
+		migrate_tasks(rq, true);
 		BUG_ON(rq->nr_running != 1); /* the migration thread */
 		raw_spin_unlock_irqrestore(&rq->lock, flags);
 		break;
@@ -6509,11 +6762,14 @@ build_sched_groups(struct sched_domain *sd, int cpu)
 static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
 {
 	struct sched_group *sg = sd->groups;
+	cpumask_t avail_mask;
 
 	WARN_ON(!sg);
 
 	do {
-		sg->group_weight = cpumask_weight(sched_group_cpus(sg));
+		cpumask_andnot(&avail_mask, sched_group_cpus(sg),
+							cpu_isolated_mask);
+		sg->group_weight = cpumask_weight(&avail_mask);
 		sg = sg->next;
 	} while (sg != sd->groups);
 
diff --git a/kernel/sched/core_ctl.c b/kernel/sched/core_ctl.c
new file mode 100644
index 000000000000..8f071757d516
--- /dev/null
+++ b/kernel/sched/core_ctl.c
@@ -0,0 +1,1014 @@
+/* Copyright (c) 2014-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/cpufreq.h>
+#include <linux/timer.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/sched/rt.h>
+
+#include <trace/events/power.h>
+
+#define MAX_CPUS_PER_GROUP 4
+
+struct cpu_data {
+	/* Per CPU data. */
+	bool	inited;
+	bool	online;
+	bool	rejected;
+	bool	is_busy;
+	bool    not_preferred;
+	unsigned int busy;
+	unsigned int cpu;
+	struct list_head sib;
+	unsigned int first_cpu;
+
+	/* Per cluster data set only on first CPU */
+	unsigned int min_cpus;
+	unsigned int max_cpus;
+	unsigned int offline_delay_ms;
+	unsigned int busy_up_thres[MAX_CPUS_PER_GROUP];
+	unsigned int busy_down_thres[MAX_CPUS_PER_GROUP];
+	unsigned int online_cpus;
+	unsigned int avail_cpus;
+	unsigned int num_cpus;
+	unsigned int need_cpus;
+	unsigned int task_thres;
+	s64 need_ts;
+	struct list_head lru;
+	bool pending;
+	spinlock_t pending_lock;
+	bool is_big_cluster;
+	int nrrun;
+	bool nrrun_changed;
+	struct timer_list timer;
+	struct task_struct *hotplug_thread;
+	struct kobject kobj;
+};
+
+static DEFINE_PER_CPU(struct cpu_data, cpu_state);
+static DEFINE_SPINLOCK(state_lock);
+static void apply_need(struct cpu_data *f);
+static void wake_up_hotplug_thread(struct cpu_data *state);
+
+/* ========================= sysfs interface =========================== */
+
+static ssize_t store_min_cpus(struct cpu_data *state,
+				const char *buf, size_t count)
+{
+	unsigned int val;
+
+	if (sscanf(buf, "%u\n", &val) != 1)
+		return -EINVAL;
+
+	state->min_cpus = min(val, state->max_cpus);
+	wake_up_hotplug_thread(state);
+
+	return count;
+}
+
+static ssize_t show_min_cpus(struct cpu_data *state, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%u\n", state->min_cpus);
+}
+
+static ssize_t store_max_cpus(struct cpu_data *state,
+				const char *buf, size_t count)
+{
+	unsigned int val;
+
+	if (sscanf(buf, "%u\n", &val) != 1)
+		return -EINVAL;
+
+	val = min(val, state->num_cpus);
+	state->max_cpus = val;
+	state->min_cpus = min(state->min_cpus, state->max_cpus);
+	wake_up_hotplug_thread(state);
+
+	return count;
+}
+
+static ssize_t show_max_cpus(struct cpu_data *state, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%u\n", state->max_cpus);
+}
+
+static ssize_t store_offline_delay_ms(struct cpu_data *state,
+					const char *buf, size_t count)
+{
+	unsigned int val;
+
+	if (sscanf(buf, "%u\n", &val) != 1)
+		return -EINVAL;
+
+	state->offline_delay_ms = val;
+	apply_need(state);
+
+	return count;
+}
+
+static ssize_t show_task_thres(struct cpu_data *state, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%u\n", state->task_thres);
+}
+
+static ssize_t store_task_thres(struct cpu_data *state,
+					const char *buf, size_t count)
+{
+	unsigned int val;
+
+	if (sscanf(buf, "%u\n", &val) != 1)
+		return -EINVAL;
+
+	if (val < state->num_cpus)
+		return -EINVAL;
+
+	state->task_thres = val;
+	apply_need(state);
+
+	return count;
+}
+
+static ssize_t show_offline_delay_ms(struct cpu_data *state, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%u\n", state->offline_delay_ms);
+}
+
+static ssize_t store_busy_up_thres(struct cpu_data *state,
+					const char *buf, size_t count)
+{
+	unsigned int val[MAX_CPUS_PER_GROUP];
+	int ret, i;
+
+	ret = sscanf(buf, "%u %u %u %u\n", &val[0], &val[1], &val[2], &val[3]);
+	if (ret != 1 && ret != state->num_cpus)
+		return -EINVAL;
+
+	if (ret == 1) {
+		for (i = 0; i < state->num_cpus; i++)
+			state->busy_up_thres[i] = val[0];
+	} else {
+		for (i = 0; i < state->num_cpus; i++)
+			state->busy_up_thres[i] = val[i];
+	}
+	apply_need(state);
+	return count;
+}
+
+static ssize_t show_busy_up_thres(struct cpu_data *state, char *buf)
+{
+	int i, count = 0;
+
+	for (i = 0; i < state->num_cpus; i++)
+		count += snprintf(buf + count, PAGE_SIZE - count, "%u ",
+				  state->busy_up_thres[i]);
+	count += snprintf(buf + count, PAGE_SIZE - count, "\n");
+	return count;
+}
+
+static ssize_t store_busy_down_thres(struct cpu_data *state,
+					const char *buf, size_t count)
+{
+	unsigned int val[MAX_CPUS_PER_GROUP];
+	int ret, i;
+
+	ret = sscanf(buf, "%u %u %u %u\n", &val[0], &val[1], &val[2], &val[3]);
+	if (ret != 1 && ret != state->num_cpus)
+		return -EINVAL;
+
+	if (ret == 1) {
+		for (i = 0; i < state->num_cpus; i++)
+			state->busy_down_thres[i] = val[0];
+	} else {
+		for (i = 0; i < state->num_cpus; i++)
+			state->busy_down_thres[i] = val[i];
+	}
+	apply_need(state);
+	return count;
+}
+
+static ssize_t show_busy_down_thres(struct cpu_data *state, char *buf)
+{
+	int i, count = 0;
+
+	for (i = 0; i < state->num_cpus; i++)
+		count += snprintf(buf + count, PAGE_SIZE - count, "%u ",
+				  state->busy_down_thres[i]);
+	count += snprintf(buf + count, PAGE_SIZE - count, "\n");
+	return count;
+}
+
+static ssize_t store_is_big_cluster(struct cpu_data *state,
+				const char *buf, size_t count)
+{
+	unsigned int val;
+
+	if (sscanf(buf, "%u\n", &val) != 1)
+		return -EINVAL;
+
+	state->is_big_cluster = val ? 1 : 0;
+	return count;
+}
+
+static ssize_t show_is_big_cluster(struct cpu_data *state, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%u\n", state->is_big_cluster);
+}
+
+static ssize_t show_cpus(struct cpu_data *state, char *buf)
+{
+	struct cpu_data *c;
+	ssize_t count = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&state_lock, flags);
+	list_for_each_entry(c, &state->lru, sib) {
+		count += snprintf(buf + count, PAGE_SIZE - count,
+					"CPU%u (%s)\n", c->cpu,
+					c->online ? "Online" : "Offline");
+	}
+	spin_unlock_irqrestore(&state_lock, flags);
+	return count;
+}
+
+static ssize_t show_need_cpus(struct cpu_data *state, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%u\n", state->need_cpus);
+}
+
+static ssize_t show_online_cpus(struct cpu_data *state, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%u\n", state->online_cpus);
+}
+
+static ssize_t show_global_state(struct cpu_data *state, char *buf)
+{
+	struct cpu_data *c;
+	ssize_t count = 0;
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu) {
+		count += snprintf(buf + count, PAGE_SIZE - count,
+					"CPU%u\n", cpu);
+		c = &per_cpu(cpu_state, cpu);
+		if (!c->inited)
+			continue;
+		count += snprintf(buf + count, PAGE_SIZE - count,
+					"\tCPU: %u\n", c->cpu);
+		count += snprintf(buf + count, PAGE_SIZE - count,
+					"\tOnline: %u\n", c->online);
+		count += snprintf(buf + count, PAGE_SIZE - count,
+					"\tRejected: %u\n", c->rejected);
+		count += snprintf(buf + count, PAGE_SIZE - count,
+					"\tFirst CPU: %u\n", c->first_cpu);
+		count += snprintf(buf + count, PAGE_SIZE - count,
+					"\tBusy%%: %u\n", c->busy);
+		count += snprintf(buf + count, PAGE_SIZE - count,
+					"\tIs busy: %u\n", c->is_busy);
+		if (c->cpu != c->first_cpu)
+			continue;
+		count += snprintf(buf + count, PAGE_SIZE - count,
+					"\tNr running: %u\n", c->nrrun);
+		count += snprintf(buf + count, PAGE_SIZE - count,
+					"\tAvail CPUs: %u\n", c->avail_cpus);
+		count += snprintf(buf + count, PAGE_SIZE - count,
+					"\tNeed CPUs: %u\n", c->need_cpus);
+	}
+
+	return count;
+}
+
+static ssize_t store_not_preferred(struct cpu_data *state,
+						const char *buf, size_t count)
+{
+	struct cpu_data *c;
+	unsigned int i, first_cpu;
+	unsigned int val[MAX_CPUS_PER_GROUP];
+	int ret;
+
+	ret = sscanf(buf, "%u %u %u %u\n", &val[0], &val[1], &val[2], &val[3]);
+	if (ret != 1 && ret != state->num_cpus)
+		return -EINVAL;
+
+	first_cpu = state->first_cpu;
+
+	for (i = 0; i < state->num_cpus; i++) {
+		c = &per_cpu(cpu_state, first_cpu);
+		c->not_preferred = val[i];
+		first_cpu++;
+	}
+
+	return count;
+}
+
+static ssize_t show_not_preferred(struct cpu_data *state, char *buf)
+{
+	struct cpu_data *c;
+	ssize_t count = 0;
+	unsigned int i, first_cpu;
+
+	first_cpu = state->first_cpu;
+
+	for (i = 0; i < state->num_cpus; i++) {
+		c = &per_cpu(cpu_state, first_cpu);
+		count += snprintf(buf + count, PAGE_SIZE - count,
+				"\tCPU:%d %u\n", first_cpu, c->not_preferred);
+		first_cpu++;
+	}
+
+	return count;
+}
+
+struct core_ctl_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct cpu_data *, char *);
+	ssize_t (*store)(struct cpu_data *, const char *, size_t count);
+};
+
+#define core_ctl_attr_ro(_name)		\
+static struct core_ctl_attr _name =	\
+__ATTR(_name, 0444, show_##_name, NULL)
+
+#define core_ctl_attr_rw(_name)			\
+static struct core_ctl_attr _name =		\
+__ATTR(_name, 0644, show_##_name, store_##_name)
+
+core_ctl_attr_rw(min_cpus);
+core_ctl_attr_rw(max_cpus);
+core_ctl_attr_rw(offline_delay_ms);
+core_ctl_attr_rw(busy_up_thres);
+core_ctl_attr_rw(busy_down_thres);
+core_ctl_attr_rw(task_thres);
+core_ctl_attr_rw(is_big_cluster);
+core_ctl_attr_ro(cpus);
+core_ctl_attr_ro(need_cpus);
+core_ctl_attr_ro(online_cpus);
+core_ctl_attr_ro(global_state);
+core_ctl_attr_rw(not_preferred);
+
+static struct attribute *default_attrs[] = {
+	&min_cpus.attr,
+	&max_cpus.attr,
+	&offline_delay_ms.attr,
+	&busy_up_thres.attr,
+	&busy_down_thres.attr,
+	&task_thres.attr,
+	&is_big_cluster.attr,
+	&cpus.attr,
+	&need_cpus.attr,
+	&online_cpus.attr,
+	&global_state.attr,
+	&not_preferred.attr,
+	NULL
+};
+
+#define to_cpu_data(k) container_of(k, struct cpu_data, kobj)
+#define to_attr(a) container_of(a, struct core_ctl_attr, attr)
+static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+	struct cpu_data *data = to_cpu_data(kobj);
+	struct core_ctl_attr *cattr = to_attr(attr);
+	ssize_t ret = -EIO;
+
+	if (cattr->show)
+		ret = cattr->show(data, buf);
+
+	return ret;
+}
+
+static ssize_t store(struct kobject *kobj, struct attribute *attr,
+		     const char *buf, size_t count)
+{
+	struct cpu_data *data = to_cpu_data(kobj);
+	struct core_ctl_attr *cattr = to_attr(attr);
+	ssize_t ret = -EIO;
+
+	if (cattr->store)
+		ret = cattr->store(data, buf, count);
+
+	return ret;
+}
+
+static const struct sysfs_ops sysfs_ops = {
+	.show	= show,
+	.store	= store,
+};
+
+static struct kobj_type ktype_core_ctl = {
+	.sysfs_ops	= &sysfs_ops,
+	.default_attrs	= default_attrs,
+};
+
+/* ==================== runqueue based core count =================== */
+
+#define RQ_AVG_TOLERANCE 2
+#define RQ_AVG_DEFAULT_MS 20
+#define NR_RUNNING_TOLERANCE 5
+static unsigned int rq_avg_period_ms = RQ_AVG_DEFAULT_MS;
+
+static s64 rq_avg_timestamp_ms;
+static struct timer_list rq_avg_timer;
+
+static void update_running_avg(bool trigger_update)
+{
+	int cpu;
+	struct cpu_data *pcpu;
+	int avg, iowait_avg, big_avg, old_nrrun;
+	s64 now;
+	unsigned long flags;
+
+	spin_lock_irqsave(&state_lock, flags);
+
+	now = ktime_to_ms(ktime_get());
+	if (now - rq_avg_timestamp_ms < rq_avg_period_ms - RQ_AVG_TOLERANCE) {
+		spin_unlock_irqrestore(&state_lock, flags);
+		return;
+	}
+	rq_avg_timestamp_ms = now;
+	sched_get_nr_running_avg(&avg, &iowait_avg, &big_avg);
+
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	/*
+	 * Round up to the next integer if the average nr running tasks
+	 * is within NR_RUNNING_TOLERANCE/100 of the next integer.
+	 * If normal rounding up is used, it will allow a transient task
+	 * to trigger online event. By the time core is onlined, the task
+	 * has finished.
+	 * Rounding to closest suffers same problem because scheduler
+	 * might only provide running stats per jiffy, and a transient
+	 * task could skew the number for one jiffy. If core control
+	 * samples every 2 jiffies, it will observe 0.5 additional running
+	 * average which rounds up to 1 task.
+	 */
+	avg = (avg + NR_RUNNING_TOLERANCE) / 100;
+	big_avg = (big_avg + NR_RUNNING_TOLERANCE) / 100;
+
+	for_each_possible_cpu(cpu) {
+		pcpu = &per_cpu(cpu_state, cpu);
+		if (!pcpu->inited || pcpu->first_cpu != cpu)
+			continue;
+		old_nrrun = pcpu->nrrun;
+		/*
+		 * Big cluster only need to take care of big tasks, but if
+		 * there are not enough big cores, big tasks need to be run
+		 * on little as well. Thus for little's runqueue stat, it
+		 * has to use overall runqueue average, or derive what big
+		 * tasks would have to be run on little. The latter approach
+		 * is not easy to get given core control reacts much slower
+		 * than scheduler, and can't predict scheduler's behavior.
+		 */
+		pcpu->nrrun = pcpu->is_big_cluster ? big_avg : avg;
+		if (pcpu->nrrun != old_nrrun) {
+			if (trigger_update)
+				apply_need(pcpu);
+			else
+				pcpu->nrrun_changed = true;
+		}
+	}
+}
+
+/* adjust needed CPUs based on current runqueue information */
+static unsigned int apply_task_need(struct cpu_data *f, unsigned int new_need)
+{
+	/* Online all cores if there are enough tasks */
+	if (f->nrrun >= f->task_thres)
+		return f->num_cpus;
+
+	/* only online more cores if there are tasks to run */
+	if (f->nrrun > new_need)
+		return new_need + 1;
+
+	return new_need;
+}
+
+static u64 round_to_nw_start(void)
+{
+	unsigned long step = msecs_to_jiffies(rq_avg_period_ms);
+	u64 jif = get_jiffies_64();
+
+	do_div(jif, step);
+	return (jif + 1) * step;
+}
+
+static void rq_avg_timer_func(unsigned long not_used)
+{
+	update_running_avg(true);
+	mod_timer(&rq_avg_timer, round_to_nw_start());
+}
+
+/* ======================= load based core count  ====================== */
+
+static unsigned int apply_limits(struct cpu_data *f, unsigned int need_cpus)
+{
+	return min(max(f->min_cpus, need_cpus), f->max_cpus);
+}
+
+static bool eval_need(struct cpu_data *f)
+{
+	unsigned long flags;
+	struct cpu_data *c;
+	unsigned int need_cpus = 0, last_need, thres_idx;
+	int ret = 0;
+	bool need_flag = false;
+	s64 now;
+
+	if (unlikely(!f->inited))
+		return 0;
+
+	spin_lock_irqsave(&state_lock, flags);
+	thres_idx = f->online_cpus ? f->online_cpus - 1 : 0;
+	list_for_each_entry(c, &f->lru, sib) {
+		if (c->busy >= f->busy_up_thres[thres_idx])
+			c->is_busy = true;
+		else if (c->busy < f->busy_down_thres[thres_idx])
+			c->is_busy = false;
+		need_cpus += c->is_busy;
+	}
+	need_cpus = apply_task_need(f, need_cpus);
+	need_flag = apply_limits(f, need_cpus) != apply_limits(f, f->need_cpus);
+	last_need = f->need_cpus;
+
+	now = ktime_to_ms(ktime_get());
+
+	if (need_cpus == last_need) {
+		f->need_ts = now;
+		spin_unlock_irqrestore(&state_lock, flags);
+		return 0;
+	}
+
+	if (need_cpus > last_need) {
+		ret = 1;
+	} else if (need_cpus < last_need) {
+		s64 elapsed = now - f->need_ts;
+
+		if (elapsed >= f->offline_delay_ms) {
+			ret = 1;
+		} else {
+			mod_timer(&f->timer, jiffies +
+				  msecs_to_jiffies(f->offline_delay_ms));
+		}
+	}
+
+	if (ret) {
+		f->need_ts = now;
+		f->need_cpus = need_cpus;
+	}
+
+	trace_core_ctl_eval_need(f->cpu, last_need, need_cpus,
+				 ret && need_flag);
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	return ret && need_flag;
+}
+
+static void apply_need(struct cpu_data *f)
+{
+	if (eval_need(f))
+		wake_up_hotplug_thread(f);
+}
+
+static int core_ctl_set_busy(unsigned int cpu, unsigned int busy)
+{
+	struct cpu_data *c = &per_cpu(cpu_state, cpu);
+	struct cpu_data *f;
+	unsigned int old_is_busy = c->is_busy;
+
+	if (!c->inited)
+		return 0;
+	f = &per_cpu(cpu_state, c->first_cpu);
+
+	update_running_avg(false);
+	if (c->busy == busy && !f->nrrun_changed)
+		return 0;
+	c->busy = busy;
+	f->nrrun_changed = false;
+
+	apply_need(f);
+	trace_core_ctl_set_busy(cpu, busy, old_is_busy, c->is_busy);
+	return 0;
+}
+
+/* ========================= core count enforcement ==================== */
+
+/*
+ * If current thread is hotplug thread, don't attempt to wake up
+ * itself or other hotplug threads because it will deadlock. Instead,
+ * schedule a timer to fire in next timer tick and wake up the thread.
+ */
+static void wake_up_hotplug_thread(struct cpu_data *state)
+{
+	unsigned long flags;
+	int cpu;
+	struct cpu_data *pcpu;
+	bool no_wakeup = false;
+
+	for_each_possible_cpu(cpu) {
+		pcpu = &per_cpu(cpu_state, cpu);
+		if (cpu != pcpu->first_cpu)
+			continue;
+		if (pcpu->hotplug_thread == current) {
+			no_wakeup = true;
+			break;
+		}
+	}
+
+	spin_lock_irqsave(&state->pending_lock, flags);
+	state->pending = true;
+	spin_unlock_irqrestore(&state->pending_lock, flags);
+
+	if (no_wakeup) {
+		spin_lock_irqsave(&state_lock, flags);
+		mod_timer(&state->timer, jiffies);
+		spin_unlock_irqrestore(&state_lock, flags);
+	} else {
+		wake_up_process(state->hotplug_thread);
+	}
+}
+
+static void core_ctl_timer_func(unsigned long cpu)
+{
+	struct cpu_data *state = &per_cpu(cpu_state, cpu);
+	unsigned long flags;
+
+	if (eval_need(state)) {
+		spin_lock_irqsave(&state->pending_lock, flags);
+		state->pending = true;
+		spin_unlock_irqrestore(&state->pending_lock, flags);
+		wake_up_process(state->hotplug_thread);
+	}
+
+}
+
+static int core_ctl_online_core(unsigned int cpu)
+{
+	int ret;
+	struct device *dev;
+
+	lock_device_hotplug();
+	dev = get_cpu_device(cpu);
+	if (!dev) {
+		pr_err("%s: failed to get cpu%d device\n", __func__, cpu);
+		ret = -ENODEV;
+	} else {
+		ret = device_online(dev);
+	}
+	unlock_device_hotplug();
+	return ret;
+}
+
+static int core_ctl_offline_core(unsigned int cpu)
+{
+	int ret;
+	struct device *dev;
+
+	lock_device_hotplug();
+	dev = get_cpu_device(cpu);
+	if (!dev) {
+		pr_err("%s: failed to get cpu%d device\n", __func__, cpu);
+		ret = -ENODEV;
+	} else {
+		ret = device_offline(dev);
+	}
+	unlock_device_hotplug();
+	return ret;
+}
+
+static void __ref do_hotplug(struct cpu_data *f)
+{
+	unsigned int need;
+	struct cpu_data *c, *tmp;
+
+	need = apply_limits(f, f->need_cpus);
+	pr_debug("Trying to adjust group %u to %u\n", f->first_cpu, need);
+
+	if (f->online_cpus > need) {
+		list_for_each_entry_safe(c, tmp, &f->lru, sib) {
+			if (!c->online)
+				continue;
+
+			if (f->online_cpus == need)
+				break;
+
+			/* Don't offline busy CPUs. */
+			if (c->is_busy)
+				continue;
+
+			pr_debug("Trying to Offline CPU%u\n", c->cpu);
+			if (core_ctl_offline_core(c->cpu))
+				pr_debug("Unable to Offline CPU%u\n", c->cpu);
+		}
+
+		/*
+		 * If the number of online CPUs is within the limits, then
+		 * don't force any busy CPUs offline.
+		 */
+		if (f->online_cpus <= f->max_cpus)
+			return;
+
+		list_for_each_entry_safe(c, tmp, &f->lru, sib) {
+			if (!c->online)
+				continue;
+
+			if (f->online_cpus <= f->max_cpus)
+				break;
+
+			pr_debug("Trying to Offline CPU%u\n", c->cpu);
+			if (core_ctl_offline_core(c->cpu))
+				pr_debug("Unable to Offline CPU%u\n", c->cpu);
+		}
+	} else if (f->online_cpus < need) {
+		list_for_each_entry_safe(c, tmp, &f->lru, sib) {
+			if (c->online || c->rejected || c->not_preferred)
+				continue;
+			if (f->online_cpus == need)
+				break;
+
+			pr_debug("Trying to Online CPU%u\n", c->cpu);
+			if (core_ctl_online_core(c->cpu))
+				pr_debug("Unable to Online CPU%u\n", c->cpu);
+		}
+
+		if (f->online_cpus == need)
+			return;
+
+
+		list_for_each_entry_safe(c, tmp, &f->lru, sib) {
+			if (c->online || c->rejected || !c->not_preferred)
+				continue;
+			if (f->online_cpus == need)
+				break;
+
+			pr_debug("Trying to Online CPU%u\n", c->cpu);
+			if (core_ctl_online_core(c->cpu))
+				pr_debug("Unable to Online CPU%u\n", c->cpu);
+		}
+
+	}
+}
+
+static int __ref try_hotplug(void *data)
+{
+	struct cpu_data *f = data;
+	unsigned long flags;
+
+	while (1) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		spin_lock_irqsave(&f->pending_lock, flags);
+		if (!f->pending) {
+			spin_unlock_irqrestore(&f->pending_lock, flags);
+			schedule();
+			if (kthread_should_stop())
+				break;
+			spin_lock_irqsave(&f->pending_lock, flags);
+		}
+		set_current_state(TASK_RUNNING);
+		f->pending = false;
+		spin_unlock_irqrestore(&f->pending_lock, flags);
+
+		do_hotplug(f);
+	}
+
+	return 0;
+}
+
+static int __ref cpu_callback(struct notifier_block *nfb,
+				unsigned long action, void *hcpu)
+{
+	uint32_t cpu = (uintptr_t)hcpu;
+	struct cpu_data *state = &per_cpu(cpu_state, cpu);
+	struct cpu_data *f;
+	int ret = NOTIFY_OK;
+	unsigned long flags;
+
+	/* Don't affect suspend resume */
+	if (action & CPU_TASKS_FROZEN)
+		return NOTIFY_OK;
+
+	if (unlikely(!state->inited))
+		return NOTIFY_OK;
+
+	f = &per_cpu(cpu_state, state->first_cpu);
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+
+		/* If online state of CPU somehow got out of sync, fix it. */
+		if (state->online) {
+			f->online_cpus--;
+			state->online = false;
+			pr_warn("CPU%d offline when state is online\n", cpu);
+		}
+
+		if (state->rejected) {
+			state->rejected = false;
+			f->avail_cpus++;
+		}
+
+		/*
+		 * If a CPU is in the process of coming up, mark it as online
+		 * so that there's no race with hotplug thread bringing up more
+		 * CPUs than necessary.
+		 */
+		if (apply_limits(f, f->need_cpus) <= f->online_cpus) {
+			pr_debug("Prevent CPU%d onlining\n", cpu);
+			ret = NOTIFY_BAD;
+		} else {
+			state->online = true;
+			f->online_cpus++;
+		}
+		break;
+
+	case CPU_ONLINE:
+		/*
+		 * Moving to the end of the list should only happen in
+		 * CPU_ONLINE and not on CPU_UP_PREPARE to prevent an
+		 * infinite list traversal when thermal (or other entities)
+		 * reject trying to online CPUs.
+		 */
+		spin_lock_irqsave(&state_lock, flags);
+		list_del(&state->sib);
+		list_add_tail(&state->sib, &f->lru);
+		spin_unlock_irqrestore(&state_lock, flags);
+		break;
+
+	case CPU_DEAD:
+		/* Move a CPU to the end of the LRU when it goes offline. */
+		spin_lock_irqsave(&state_lock, flags);
+		list_del(&state->sib);
+		list_add_tail(&state->sib, &f->lru);
+		spin_unlock_irqrestore(&state_lock, flags);
+
+		/* Fall through */
+
+	case CPU_UP_CANCELED:
+
+		/* If online state of CPU somehow got out of sync, fix it. */
+		if (!state->online) {
+			f->online_cpus++;
+			pr_warn("CPU%d online when state is offline\n", cpu);
+		}
+
+		if (!state->rejected && action == CPU_UP_CANCELED) {
+			state->rejected = true;
+			f->avail_cpus--;
+		}
+
+		state->online = false;
+		state->busy = 0;
+		f->online_cpus--;
+		break;
+	}
+
+	if (f->online_cpus < apply_limits(f, f->need_cpus)
+	    && f->online_cpus < f->avail_cpus
+	    && action == CPU_DEAD)
+		wake_up_hotplug_thread(f);
+
+	return ret;
+}
+
+static struct notifier_block __refdata cpu_notifier = {
+	.notifier_call = cpu_callback,
+};
+
+/* ============================ init code ============================== */
+
+static int group_init(struct cpumask *mask)
+{
+	struct device *dev;
+	unsigned int first_cpu = cpumask_first(mask);
+	struct cpu_data *f = &per_cpu(cpu_state, first_cpu);
+	struct cpu_data *state;
+	unsigned int cpu;
+	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+
+	if (likely(f->inited))
+		return 0;
+
+	dev = get_cpu_device(first_cpu);
+	if (!dev)
+		return -ENODEV;
+
+	pr_info("Creating CPU group %d\n", first_cpu);
+
+	f->num_cpus = cpumask_weight(mask);
+	if (f->num_cpus > MAX_CPUS_PER_GROUP) {
+		pr_err("HW configuration not supported\n");
+		return -EINVAL;
+	}
+	f->min_cpus = 1;
+	f->max_cpus = f->num_cpus;
+	f->need_cpus  = f->num_cpus;
+	f->avail_cpus  = f->num_cpus;
+	f->offline_delay_ms = 100;
+	f->task_thres = UINT_MAX;
+	f->nrrun = f->num_cpus;
+	INIT_LIST_HEAD(&f->lru);
+	init_timer(&f->timer);
+	spin_lock_init(&f->pending_lock);
+	f->timer.function = core_ctl_timer_func;
+	f->timer.data = first_cpu;
+
+	for_each_cpu(cpu, mask) {
+		pr_info("Init CPU%u state\n", cpu);
+
+		state = &per_cpu(cpu_state, cpu);
+		state->cpu = cpu;
+		state->first_cpu = first_cpu;
+
+		if (cpu_online(cpu)) {
+			f->online_cpus++;
+			state->online = true;
+		}
+
+		list_add_tail(&state->sib, &f->lru);
+	}
+
+	f->hotplug_thread = kthread_run(try_hotplug, (void *) f,
+					"core_ctl/%d", first_cpu);
+	sched_setscheduler_nocheck(f->hotplug_thread, SCHED_FIFO, &param);
+
+	for_each_cpu(cpu, mask) {
+		state = &per_cpu(cpu_state, cpu);
+		state->inited = true;
+	}
+
+	kobject_init(&f->kobj, &ktype_core_ctl);
+	return kobject_add(&f->kobj, &dev->kobj, "core_ctl");
+}
+
+static int cpufreq_policy_cb(struct notifier_block *nb, unsigned long val,
+				void *data)
+{
+	struct cpufreq_policy *policy = data;
+
+	switch (val) {
+	case CPUFREQ_CREATE_POLICY:
+		group_init(policy->related_cpus);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cpufreq_pol_nb = {
+	.notifier_call = cpufreq_policy_cb,
+};
+
+static int cpufreq_gov_cb(struct notifier_block *nb, unsigned long val,
+				void *data)
+{
+	struct cpufreq_govinfo *info = data;
+
+	switch (val) {
+	case CPUFREQ_LOAD_CHANGE:
+		core_ctl_set_busy(info->cpu, info->load);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cpufreq_gov_nb = {
+	.notifier_call = cpufreq_gov_cb,
+};
+
+static int __init core_ctl_init(void)
+{
+	struct cpufreq_policy *policy;
+	unsigned int cpu;
+
+	register_cpu_notifier(&cpu_notifier);
+	cpufreq_register_notifier(&cpufreq_pol_nb, CPUFREQ_POLICY_NOTIFIER);
+	cpufreq_register_notifier(&cpufreq_gov_nb, CPUFREQ_GOVINFO_NOTIFIER);
+	init_timer_deferrable(&rq_avg_timer);
+	rq_avg_timer.function = rq_avg_timer_func;
+
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		policy = cpufreq_cpu_get(cpu);
+		if (policy) {
+			group_init(policy->related_cpus);
+			cpufreq_cpu_put(policy);
+		}
+	}
+	put_online_cpus();
+	mod_timer(&rq_avg_timer, round_to_nw_start());
+	return 0;
+}
+
+late_initcall(core_ctl_init);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e893b0fcac6b..83da13b5f6b8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2941,6 +2941,8 @@ static void find_best_cpu_in_cluster(struct sched_cluster *c,
 	struct cpumask search_cpus;
 
 	cpumask_and(&search_cpus, tsk_cpus_allowed(env->p), &c->cpus);
+	cpumask_andnot(&search_cpus, &search_cpus, cpu_isolated_mask);
+
 	if (env->ignore_prev_cpu)
 		cpumask_clear_cpu(env->prev_cpu, &search_cpus);
 
@@ -3009,7 +3011,8 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
 
 	prev_cpu = env->prev_cpu;
 	if (!cpumask_test_cpu(prev_cpu, tsk_cpus_allowed(task)) ||
-					unlikely(!cpu_active(prev_cpu)))
+					unlikely(!cpu_active(prev_cpu)) ||
+					cpu_isolated(prev_cpu))
 		return false;
 
 	if (task->ravg.mark_start - task->last_cpu_selected_ts >=
@@ -7354,6 +7357,8 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
 			struct sched_group_capacity *sgc;
 			struct rq *rq = cpu_rq(cpu);
 
+			if (cpumask_test_cpu(cpu, cpu_isolated_mask))
+				continue;
 			/*
 			 * build_sched_domains() -> init_sched_groups_capacity()
 			 * gets here before we've attached the domains to the
@@ -7381,7 +7386,11 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
 
 		group = child->groups;
 		do {
-			capacity += group->sgc->capacity;
+			cpumask_t *cpus = sched_group_cpus(group);
+
+			/* Revisit this later. This won't work for MT domain */
+			if (!cpu_isolated(cpumask_first(cpus)))
+				capacity += group->sgc->capacity;
 			group = group->next;
 		} while (group != child->groups);
 	}
@@ -7521,6 +7530,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 				     power_cost(i, 0),
 				     cpu_temp(i));
 
+		if (cpu_isolated(i))
+			continue;
+
 		/* Bias balancing toward cpus of our domain */
 		if (local_group)
 			load = target_load(i, load_idx);
@@ -7548,17 +7560,27 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 			sgs->idle_cpus++;
 	}
 
-	/* Adjust by relative CPU capacity of the group */
-	sgs->group_capacity = group->sgc->capacity;
-	sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
+	/* Isolated CPU has no weight */
+	if (!group->group_weight) {
+		sgs->group_capacity = 0;
+		sgs->avg_load = 0;
+		sgs->group_no_capacity = 1;
+		sgs->group_type = group_other;
+		sgs->group_weight = group->group_weight;
+	} else {
+		/* Adjust by relative CPU capacity of the group */
+		sgs->group_capacity = group->sgc->capacity;
+		sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) /
+							sgs->group_capacity;
 
-	if (sgs->sum_nr_running)
-		sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
+		sgs->group_weight = group->group_weight;
 
-	sgs->group_weight = group->group_weight;
+		sgs->group_no_capacity = group_is_overloaded(env, sgs);
+		sgs->group_type = group_classify(group, sgs, env);
+	}
 
-	sgs->group_no_capacity = group_is_overloaded(env, sgs);
-	sgs->group_type = group_classify(group, sgs, env);
+	if (sgs->sum_nr_running)
+		sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
 }
 
 #ifdef CONFIG_SCHED_HMP
@@ -8601,6 +8623,9 @@ static int idle_balance(struct rq *this_rq)
 	int pulled_task = 0;
 	u64 curr_cost = 0;
 
+	if (cpu_isolated(this_cpu))
+		return 0;
+
 	idle_enter_fair(this_rq);
 
 	/*
@@ -8908,16 +8933,21 @@ static void nohz_balancer_kick(int type)
 	return;
 }
 
+void nohz_balance_clear_nohz_mask(int cpu)
+{
+	if (likely(cpumask_test_cpu(cpu, nohz.idle_cpus_mask))) {
+		cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
+		atomic_dec(&nohz.nr_cpus);
+	}
+}
+
 static inline void nohz_balance_exit_idle(int cpu)
 {
 	if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
 		/*
 		 * Completely isolated CPUs don't ever set, so we must test.
 		 */
-		if (likely(cpumask_test_cpu(cpu, nohz.idle_cpus_mask))) {
-			cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
-			atomic_dec(&nohz.nr_cpus);
-		}
+		nohz_balance_clear_nohz_mask(cpu);
 		clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
 	}
 }
@@ -8974,7 +9004,7 @@ void nohz_balance_enter_idle(int cpu)
 	/*
 	 * If we're a completely isolated CPU, we don't play.
 	 */
-	if (on_null_domain(cpu_rq(cpu)))
+	if (on_null_domain(cpu_rq(cpu)) || cpu_isolated(cpu))
 		return;
 
 	cpumask_set_cpu(cpu, nohz.idle_cpus_mask);
@@ -9003,7 +9033,13 @@ static DEFINE_SPINLOCK(balancing);
  */
 void update_max_interval(void)
 {
-	max_load_balance_interval = HZ*num_online_cpus()/10;
+	cpumask_t avail_mask;
+	unsigned int available_cpus;
+
+	cpumask_andnot(&avail_mask, cpu_online_mask, cpu_isolated_mask);
+	available_cpus = cpumask_weight(&avail_mask);
+
+	max_load_balance_interval = HZ*available_cpus/10;
 }
 
 /*
@@ -9342,8 +9378,10 @@ void trigger_load_balance(struct rq *rq)
 {
 	int type = NOHZ_KICK_ANY;
 
-	/* Don't need to rebalance while attached to NULL domain */
-	if (unlikely(on_null_domain(rq)))
+	/* Don't need to rebalance while attached to NULL domain or
+	 * cpu is isolated.
+	 */
+	if (unlikely(on_null_domain(rq)) || cpu_isolated(cpu_of(rq)))
 		return;
 
 	if (time_after_eq(jiffies, rq->next_balance))
diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c
index 5002619961ce..a921498dbf09 100644
--- a/kernel/sched/hmp.c
+++ b/kernel/sched/hmp.c
@@ -2828,10 +2828,10 @@ void set_window_start(struct rq *rq)
 	rq->curr->ravg.mark_start = rq->window_start;
 }
 
-void migrate_sync_cpu(int cpu)
+void migrate_sync_cpu(int cpu, int new_cpu)
 {
 	if (cpu == sync_cpu)
-		sync_cpu = smp_processor_id();
+		sync_cpu = new_cpu;
 }
 
 static void reset_all_task_stats(void)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index cfec881491ef..ba4403e910d8 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -265,8 +265,12 @@ static void pull_rt_task(struct rq *this_rq);
 
 static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
 {
-	/* Try to pull RT tasks here if we lower this rq's prio */
-	return rq->rt.highest_prio.curr > prev->prio;
+	/*
+	 * Try to pull RT tasks here if we lower this rq's prio and cpu is not
+	 * isolated
+	 */
+	return rq->rt.highest_prio.curr > prev->prio &&
+	       !cpu_isolated(cpu_of(rq));
 }
 
 static inline int rt_overloaded(struct rq *rq)
@@ -1694,6 +1698,8 @@ static int find_lowest_rq_hmp(struct task_struct *task)
 
 	for_each_sched_cluster(cluster) {
 		cpumask_and(&candidate_mask, &cluster->cpus, lowest_mask);
+		cpumask_andnot(&candidate_mask, &candidate_mask,
+			       cpu_isolated_mask);
 
 		if (cpumask_empty(&candidate_mask))
 			continue;
@@ -2282,7 +2288,8 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
 	 * we may need to handle the pulling of RT tasks
 	 * now.
 	 */
-	if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
+	if (!task_on_rq_queued(p) || rq->rt.rt_nr_running ||
+		cpu_isolated(cpu_of(rq)))
 		return;
 
 	queue_pull_task(rq);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ec7721112b05..41abb4dabeb7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1069,7 +1069,7 @@ extern void clear_boost_kick(int cpu);
 extern void clear_hmp_request(int cpu);
 extern void mark_task_starting(struct task_struct *p);
 extern void set_window_start(struct rq *rq);
-extern void migrate_sync_cpu(int cpu);
+extern void migrate_sync_cpu(int cpu, int new_cpu);
 extern void update_cluster_topology(void);
 extern void set_task_last_wake(struct task_struct *p, u64 wallclock);
 extern void set_task_last_switch_out(struct task_struct *p, u64 wallclock);
@@ -1424,7 +1424,7 @@ static inline void clear_boost_kick(int cpu) { }
 static inline void clear_hmp_request(int cpu) { }
 static inline void mark_task_starting(struct task_struct *p) { }
 static inline void set_window_start(struct rq *rq) { }
-static inline void migrate_sync_cpu(int cpu) { }
+static inline void migrate_sync_cpu(int cpu, int new_cpu) {}
 static inline void update_cluster_topology(void) { }
 static inline void set_task_last_wake(struct task_struct *p, u64 wallclock) { }
 static inline void set_task_last_switch_out(struct task_struct *p,
@@ -1953,6 +1953,7 @@ extern const struct sched_class idle_sched_class;
 extern void update_group_capacity(struct sched_domain *sd, int cpu);
 
 extern void trigger_load_balance(struct rq *rq);
+extern void nohz_balance_clear_nohz_mask(int cpu);
 
 extern void idle_enter_fair(struct rq *this_rq);
 extern void idle_exit_fair(struct rq *this_rq);
diff --git a/kernel/smp.c b/kernel/smp.c
index abdc48cd79a3..b2ec21c5c9d6 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -766,8 +766,8 @@ void wake_up_all_idle_cpus(void)
 	for_each_online_cpu(cpu) {
 		if (cpu == smp_processor_id())
 			continue;
-
-		wake_up_if_idle(cpu);
+		if (!cpu_isolated(cpu))
+			wake_up_if_idle(cpu);
 	}
 	preempt_enable();
 }
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index fa909f9fd559..1b0117198a08 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -880,7 +880,7 @@ static int enqueue_hrtimer(struct hrtimer *timer,
 
 	base->cpu_base->active_bases |= 1 << base->index;
 
-	timer->state = HRTIMER_STATE_ENQUEUED;
+	timer->state |= HRTIMER_STATE_ENQUEUED;
 
 	return timerqueue_add(&base->active, &timer->node);
 }
@@ -900,11 +900,9 @@ static void __remove_hrtimer(struct hrtimer *timer,
 			     u8 newstate, int reprogram)
 {
 	struct hrtimer_cpu_base *cpu_base = base->cpu_base;
-	u8 state = timer->state;
 
-	timer->state = newstate;
-	if (!(state & HRTIMER_STATE_ENQUEUED))
-		return;
+	if (!(timer->state & HRTIMER_STATE_ENQUEUED))
+		goto out;
 
 	if (!timerqueue_del(&base->active, &timer->node))
 		cpu_base->active_bases &= ~(1 << base->index);
@@ -921,6 +919,13 @@ static void __remove_hrtimer(struct hrtimer *timer,
 	if (reprogram && timer == cpu_base->next_timer)
 		hrtimer_force_reprogram(cpu_base, 1);
 #endif
+
+out:
+	/*
+	* We need to preserve PINNED state here, otherwise we may end up
+	* migrating pinned hrtimers as well.
+	*/
+	timer->state = newstate | (timer->state & HRTIMER_STATE_PINNED);
 }
 
 /*
@@ -949,6 +954,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest
 			state = HRTIMER_STATE_INACTIVE;
 
 		__remove_hrtimer(timer, base, state, reprogram);
+		timer->state &= ~HRTIMER_STATE_PINNED;
 		return 1;
 	}
 	return 0;
@@ -1002,6 +1008,10 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 
 	timer_stats_hrtimer_set_start_info(timer);
 
+	/* Update pinned state */
+	timer->state &= ~HRTIMER_STATE_PINNED;
+	timer->state |= (!!(mode & HRTIMER_MODE_PINNED)) << HRTIMER_PINNED_SHIFT;
+
 	leftmost = enqueue_hrtimer(timer, new_base);
 	if (!leftmost)
 		goto unlock;
@@ -1176,8 +1186,8 @@ bool hrtimer_active(const struct hrtimer *timer)
 		cpu_base = READ_ONCE(timer->base->cpu_base);
 		seq = raw_read_seqcount_begin(&cpu_base->seq);
 
-		if (timer->state != HRTIMER_STATE_INACTIVE ||
-		    cpu_base->running == timer)
+		if (((timer->state & ~HRTIMER_STATE_PINNED) !=
+		      HRTIMER_STATE_INACTIVE) || cpu_base->running == timer)
 			return true;
 
 	} while (read_seqcount_retry(&cpu_base->seq, seq) ||
@@ -1614,13 +1624,17 @@ static void init_hrtimers_cpu(int cpu)
 	hrtimer_init_hres(cpu_base);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-
+#if defined(CONFIG_HOTPLUG_CPU)
 static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
-				struct hrtimer_clock_base *new_base)
+				 struct hrtimer_clock_base *new_base,
+				 bool remove_pinned)
 {
 	struct hrtimer *timer;
 	struct timerqueue_node *node;
+	struct timerqueue_head pinned;
+	int is_pinned;
+
+	timerqueue_init_head(&pinned);
 
 	while ((node = timerqueue_getnext(&old_base->active))) {
 		timer = container_of(node, struct hrtimer, node);
@@ -1633,6 +1647,13 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 		 * under us on another CPU
 		 */
 		__remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0);
+
+		is_pinned = timer->state & HRTIMER_STATE_PINNED;
+		if (!remove_pinned && is_pinned) {
+			timerqueue_add(&pinned, &timer->node);
+			continue;
+		}
+
 		timer->base = new_base;
 		/*
 		 * Enqueue the timers on the new cpu. This does not
@@ -1644,17 +1665,23 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 		 */
 		enqueue_hrtimer(timer, new_base);
 	}
+
+	/* Re-queue pinned timers for non-hotplug usecase */
+	while ((node = timerqueue_getnext(&pinned))) {
+		timer = container_of(node, struct hrtimer, node);
+
+		timerqueue_del(&pinned, &timer->node);
+		enqueue_hrtimer(timer, old_base);
+	}
 }
 
-static void migrate_hrtimers(int scpu)
+static void __migrate_hrtimers(int scpu, bool remove_pinned)
 {
 	struct hrtimer_cpu_base *old_base, *new_base;
+	unsigned long flags;
 	int i;
 
-	BUG_ON(cpu_online(scpu));
-	tick_cancel_sched_timer(scpu);
-
-	local_irq_disable();
+	local_irq_save(flags);
 	old_base = &per_cpu(hrtimer_bases, scpu);
 	new_base = this_cpu_ptr(&hrtimer_bases);
 	/*
@@ -1666,7 +1693,7 @@ static void migrate_hrtimers(int scpu)
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
 		migrate_hrtimer_list(&old_base->clock_base[i],
-				     &new_base->clock_base[i]);
+				     &new_base->clock_base[i], remove_pinned);
 	}
 
 	raw_spin_unlock(&old_base->lock);
@@ -1674,7 +1701,20 @@ static void migrate_hrtimers(int scpu)
 
 	/* Check, if we got expired work to do */
 	__hrtimer_peek_ahead_timers();
-	local_irq_enable();
+	local_irq_restore(flags);
+}
+
+static void migrate_hrtimers(int scpu)
+{
+	BUG_ON(cpu_online(scpu));
+	tick_cancel_sched_timer(scpu);
+
+	__migrate_hrtimers(scpu, true);
+}
+
+void hrtimer_quiesce_cpu(void *cpup)
+{
+	__migrate_hrtimers(*(int *)cpup, false);
 }
 
 #endif /* CONFIG_HOTPLUG_CPU */
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 51896272fcde..0efb3916f5a4 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1620,56 +1620,86 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
 }
 EXPORT_SYMBOL(schedule_timeout_uninterruptible);
 
-#ifdef CONFIG_HOTPLUG_CPU
-static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head)
+#if defined(CONFIG_HOTPLUG_CPU)
+static void migrate_timer_list(struct tvec_base *new_base,
+			       struct hlist_head *head, bool remove_pinned)
 {
 	struct timer_list *timer;
 	int cpu = new_base->cpu;
+	struct hlist_node *n;
+	int is_pinned;
 
-	while (!hlist_empty(head)) {
-		timer = hlist_entry(head->first, struct timer_list, entry);
-		/* We ignore the accounting on the dying cpu */
-		detach_timer(timer, false);
+	hlist_for_each_entry_safe(timer, n, head, entry) {
+		is_pinned = timer->flags & TIMER_PINNED_ON_CPU;
+		if (!remove_pinned && is_pinned)
+			continue;
+
+		detach_if_pending(timer, get_timer_base(timer->flags), false);
 		timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
 		internal_add_timer(new_base, timer);
 	}
 }
 
-static void migrate_timers(int cpu)
+static void __migrate_timers(int cpu, bool wait, bool remove_pinned)
 {
 	struct tvec_base *old_base;
 	struct tvec_base *new_base;
+	unsigned long flags;
 	int i;
 
-	BUG_ON(cpu_online(cpu));
 	old_base = per_cpu_ptr(&tvec_bases, cpu);
 	new_base = get_cpu_ptr(&tvec_bases);
 	/*
 	 * The caller is globally serialized and nobody else
 	 * takes two locks at once, deadlock is not possible.
 	 */
-	spin_lock_irq(&new_base->lock);
+	spin_lock_irqsave(&new_base->lock, flags);
 	spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
-	BUG_ON(old_base->running_timer);
+	if (wait) {
+		/* Ensure timers are done running before continuing */
+		while (old_base->running_timer) {
+			spin_unlock(&old_base->lock);
+			spin_unlock_irqrestore(&new_base->lock, flags);
+			cpu_relax();
+			spin_lock_irqsave(&new_base->lock, flags);
+			spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
+		}
+	} else {
+		BUG_ON(old_base->running_timer);
+	}
 
 	for (i = 0; i < TVR_SIZE; i++)
-		migrate_timer_list(new_base, old_base->tv1.vec + i);
+		migrate_timer_list(new_base, old_base->tv1.vec + i,
+				   remove_pinned);
 	for (i = 0; i < TVN_SIZE; i++) {
-		migrate_timer_list(new_base, old_base->tv2.vec + i);
-		migrate_timer_list(new_base, old_base->tv3.vec + i);
-		migrate_timer_list(new_base, old_base->tv4.vec + i);
-		migrate_timer_list(new_base, old_base->tv5.vec + i);
+		migrate_timer_list(new_base, old_base->tv2.vec + i,
+				remove_pinned);
+		migrate_timer_list(new_base, old_base->tv3.vec + i,
+				remove_pinned);
+		migrate_timer_list(new_base, old_base->tv4.vec + i,
+				remove_pinned);
+		migrate_timer_list(new_base, old_base->tv5.vec + i,
+				remove_pinned);
 	}
 
-	old_base->active_timers = 0;
-	old_base->all_timers = 0;
-
 	spin_unlock(&old_base->lock);
-	spin_unlock_irq(&new_base->lock);
+	spin_unlock_irqrestore(&new_base->lock, flags);
 	put_cpu_ptr(&tvec_bases);
 }
 
+/* Migrate timers from 'cpu' to this_cpu */
+static void migrate_timers(int cpu)
+{
+	BUG_ON(cpu_online(cpu));
+	__migrate_timers(cpu, false, true);
+}
+
+void timer_quiesce_cpu(void *cpup)
+{
+	__migrate_timers(*(int *)cpup, true, false);
+}
+
 static int timer_cpu_notify(struct notifier_block *self,
 				unsigned long action, void *hcpu)
 {
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index 9270e1ac6460..49fa2e6eea98 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -15,5 +15,3 @@
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(suspend_resume);
 EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);
-EXPORT_TRACEPOINT_SYMBOL(core_ctl_set_busy);
-EXPORT_TRACEPOINT_SYMBOL(core_ctl_eval_need);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 029da92fb712..7f21591c8ec5 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -13,6 +13,7 @@
 
 #include <linux/mm.h>
 #include <linux/cpu.h>
+#include <linux/device.h>
 #include <linux/nmi.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -95,6 +96,7 @@ static u64 __read_mostly sample_period;
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
+static DEFINE_PER_CPU(unsigned int, watchdog_en);
 static DEFINE_PER_CPU(bool, softlockup_touch_sync);
 static DEFINE_PER_CPU(bool, soft_watchdog_warn);
 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
@@ -586,9 +588,17 @@ static void watchdog_set_prio(unsigned int policy, unsigned int prio)
 	sched_setscheduler(current, policy, &param);
 }
 
-static void watchdog_enable(unsigned int cpu)
+/* Must be called with hotplug lock (lock_device_hotplug()) held. */
+void watchdog_enable(unsigned int cpu)
 {
 	struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
+	unsigned int *enabled = raw_cpu_ptr(&watchdog_en);
+
+	lock_device_hotplug_assert();
+
+	if (*enabled)
+		return;
+	*enabled = 1;
 
 	/* kick off the timer for the hardlockup detector */
 	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -606,9 +616,17 @@ static void watchdog_enable(unsigned int cpu)
 	__touch_watchdog();
 }
 
-static void watchdog_disable(unsigned int cpu)
+/* Must be called with hotplug lock (lock_device_hotplug()) held. */
+void watchdog_disable(unsigned int cpu)
 {
 	struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
+	unsigned int *enabled = raw_cpu_ptr(&watchdog_en);
+
+	lock_device_hotplug_assert();
+
+	if (!*enabled)
+		return;
+	*enabled = 0;
 
 	watchdog_set_prio(SCHED_NORMAL, 0);
 	hrtimer_cancel(hrtimer);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index ca75eeecbad1..77b8eabd5446 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1390,7 +1390,7 @@ static cpumask_var_t cpu_stat_off;
 
 static void vmstat_update(struct work_struct *w)
 {
-	if (refresh_cpu_vm_stats(true)) {
+	if (refresh_cpu_vm_stats(true) && !cpu_isolated(smp_processor_id())) {
 		/*
 		 * Counters were updated so we expect more updates
 		 * to occur in the future. Keep on running the
@@ -1402,7 +1402,8 @@ static void vmstat_update(struct work_struct *w)
 	} else {
 		/*
 		 * We did not update any counters so the app may be in
-		 * a mode where it does not cause counter updates.
+		 * a mode where it does not cause counter updates or the cpu
+		 * was isolated.
 		 * We may be uselessly running vmstat_update.
 		 * Defer the checking for differentials to the
 		 * shepherd thread on a different processor.
@@ -1469,7 +1470,7 @@ static void vmstat_shepherd(struct work_struct *w)
 	get_online_cpus();
 	/* Check processors whose vmstat worker threads have been disabled */
 	for_each_cpu(cpu, cpu_stat_off)
-		if (need_update(cpu) &&
+		if (!cpu_isolated(cpu) && need_update(cpu) &&
 			cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
 
 			queue_delayed_work_on(cpu, vmstat_wq,
author	Linux Build Service Account <lnxbuild@localhost>	2016-09-29 11:20:17 -0700
committer	Gerrit - the friendly Code Review server <code-review@localhost>	2016-09-29 11:20:17 -0700
commit	bbf8724641bd78107e8a641e6a87d89241f7b4ee (patch)
tree	e3625abd853f3e16b704222b170280920d5e6b15
parent	a18b94fe05a311bf084cb235c0904c48eb04149f (diff)
parent	59f16ae0345c902c1d09da75e0f89d7e7ddbc54f (diff)