34 files changed, 838 insertions, 213 deletions
diff --git a/Documentation/devicetree/bindings/scheduler/sched_hmp.txt b/Documentation/devicetree/bindings/scheduler/sched_hmp.txt
new file mode 100644
index 000000000000..ba1d4db9e407
--- /dev/null
+++ b/Documentation/devicetree/bindings/scheduler/sched_hmp.txt
@@ -0,0 +1,35 @@
+* HMP scheduler
+
+This file describes the bindings for an optional HMP scheduler
+node (/sched-hmp).
+
+Required properties:
+
+Optional properties:
+
+- boost-policy: The HMP scheduler has two types of task placement boost
+policies.
+
+(1) boost-on-big policy make use of all big CPUs up to their full capacity
+before using the little CPUs. This improves performance on true b.L systems
+where the big CPUs have higher efficiency compared to the little CPUs.
+
+(2) boost-on-all policy place the tasks on the CPU having the highest
+spare capacity. This policy is optimal for SMP like systems.
+
+The scheduler sets the boost policy to boost-on-big on systems which has
+CPUs of different efficiencies. However it is possible that CPUs of the
+same micro architecture to have slight difference in efficiency due to
+other factors like cache size. Selecting the boost-on-big policy based
+on relative difference in efficiency is not optimal on such systems.
+The boost-policy device tree property is introduced to specify the
+required boost type and it overrides the default selection of boost
+type in the scheduler.
+
+The possible values for this property are "boost-on-big" and "boost-on-all".
+
+Example:
+
+sched-hmp {
+	boost-policy = "boost-on-all"
+}
diff --git a/Documentation/scheduler/sched-hmp.txt b/Documentation/scheduler/sched-hmp.txt
index b400e053e55d..298064bc44d7 100644
--- a/Documentation/scheduler/sched-hmp.txt
+++ b/Documentation/scheduler/sched-hmp.txt
@@ -43,6 +43,7 @@ CONTENTS
    8.8 sched_get_busy
    8.9 sched_freq_alert
    8.10 sched_set_boost
+9. Device Tree bindings
 
 ===============
 1. INTRODUCTION
@@ -1447,3 +1448,10 @@ Logged when boost settings are being changed
 <task>-0     [004] d.h4 12700.711489: sched_set_boost: ref_count=1
 
 - ref_count: A non-zero value indicates boost is in effect
+
+========================
+9. Device Tree bindings
+========================
+
+The device tree bindings for the HMP scheduler are defined in
+Documentation/devicetree/bindings/sched/sched_hmp.txt
diff --git a/arch/arm/boot/dts/qcom/msmcobalt-v2.dtsi b/arch/arm/boot/dts/qcom/msmcobalt-v2.dtsi
index e19fb5f7c233..25fd35c1bd10 100644
--- a/arch/arm/boot/dts/qcom/msmcobalt-v2.dtsi
+++ b/arch/arm/boot/dts/qcom/msmcobalt-v2.dtsi
@@ -664,11 +664,11 @@
 						/* SVS2 */
 
 	qcom,imem-ab-tbl =
-		<200000000 1752000>,
-		<269330000 1752000>,
-		<355200000 2500000>,
-		<444000000 6000000>,
-		<533000000 6000000>;
+		<200000000 1560000>,/* imem @ svs2 freq 75 Mhz */
+		<269330000 3570000>,/* imem @ svs freq 171 Mhz */
+		<355200000 3570000>,/* imem @ svs freq 171 Mhz */
+		<444000000 6750000>,/* imem @ nom freq 323 Mhz */
+		<533000000 8490000>;/* imem @ turbo freq 406 Mhz */
 };
 
 /* GPU overrides */
diff --git a/arch/arm/boot/dts/qcom/msmcobalt-vidc.dtsi b/arch/arm/boot/dts/qcom/msmcobalt-vidc.dtsi
index c44e8f976710..f17be7570742 100644
--- a/arch/arm/boot/dts/qcom/msmcobalt-vidc.dtsi
+++ b/arch/arm/boot/dts/qcom/msmcobalt-vidc.dtsi
@@ -66,10 +66,10 @@
 		* corresponding video core frequency.
 		*/
 		qcom,imem-ab-tbl =
-			<100000000 1752000>, /* imem @ svs2 freq 75 Mhz */
-			<186000000 1752000>, /* imem @ svs2 freq 75 Mhz */
-			<360000000 2500000>, /* imem @ svs freq 171 Mhz */
-			<465000000 6000000>; /* imem @ noimal freq 320 Mhz */
+			<100000000 1560000>, /* imem @ svs2 freq 75 Mhz */
+			<186000000 3570000>, /* imem @ svs freq 171 Mhz */
+			<360000000 6750000>, /* imem @ nom freq 323 Mhz */
+			<465000000 8490000>; /* imem @ turbo freq 406 Mhz */
 
 		/* Regulators */
 		smmu-vdd-supply = <&gdsc_bimc_smmu>;
@@ -124,7 +124,7 @@
 			qcom,bus-master = <MSM_BUS_MASTER_VIDEO_P0_OCMEM>;
 			qcom,bus-slave = <MSM_BUS_SLAVE_VMEM>;
 			qcom,bus-governor = "msm-vidc-vmem+";
-			qcom,bus-range-kbps = <1000 6776000>;
+			qcom,bus-range-kbps = <1000 8490000>;
 		};
 
 		arm9_bus_ddr {
diff --git a/arch/arm/boot/dts/qcom/msmfalcon-coresight.dtsi b/arch/arm/boot/dts/qcom/msmfalcon-coresight.dtsi
index 352856965373..b60d4013dad8 100644
--- a/arch/arm/boot/dts/qcom/msmfalcon-coresight.dtsi
+++ b/arch/arm/boot/dts/qcom/msmfalcon-coresight.dtsi
@@ -26,6 +26,8 @@
 		arm,buffer-size = <0x400000>;
 		arm,sg-enable;
 
+		coresight-ctis = <&cti0 &cti8>;
+
 		coresight-name = "coresight-tmc-etr";
 
 		clocks = <&clock_gcc RPM_QDSS_CLK>,
@@ -76,6 +78,8 @@
 
 		coresight-name = "coresight-tmc-etf";
 
+		coresight-ctis = <&cti0 &cti8>;
+
 		clocks = <&clock_gcc RPM_QDSS_CLK>,
 			 <&clock_gcc RPM_QDSS_A_CLK>;
 		clock-names = "apb_pclk", "core_a_clk";
@@ -161,6 +165,14 @@
 						<&funnel_merg_in_funnel_in0>;
 				};
 			};
+			port@3 {
+				reg = <6>;
+				funnel_in0_in_funnel_qatb: endpoint {
+					slave-mode;
+					remote-endpoint =
+						<&funnel_qatb_out_funnel_in0>;
+				};
+			};
 			port@4 {
 				reg = <7>;
 				funnel_in0_in_stm: endpoint {
@@ -191,4 +203,294 @@
 			};
 		};
 	};
+
+	cti0: cti@6010000 {
+		compatible = "arm,coresight-cti";
+		reg = <0x6010000 0x1000>;
+		reg-names = "cti-base";
+
+		coresight-name = "coresight-cti0";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "core_clk", "core_a_clk";
+	};
+
+	cti1: cti@6011000 {
+		compatible = "arm,coresight-cti";
+		reg = <0x6011000 0x1000>;
+		reg-names = "cti-base";
+
+		coresight-name = "coresight-cti1";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "core_clk", "core_a_clk";
+	};
+
+	cti2: cti@6012000 {
+		compatible = "arm,coresight-cti";
+		reg = <0x6012000 0x1000>;
+		reg-names = "cti-base";
+
+		coresight-name = "coresight-cti2";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "core_clk", "core_a_clk";
+	};
+
+	cti3: cti@6013000 {
+		compatible = "arm,coresight-cti";
+		reg = <0x6013000 0x1000>;
+		reg-names = "cti-base";
+
+		coresight-name = "coresight-cti3";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "core_clk", "core_a_clk";
+	};
+
+	cti4: cti@6014000 {
+		compatible = "arm,coresight-cti";
+		reg = <0x6014000 0x1000>;
+		reg-names = "cti-base";
+
+		coresight-name = "coresight-cti4";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "core_clk", "core_a_clk";
+	};
+
+	cti5: cti@6015000 {
+		compatible = "arm,coresight-cti";
+		reg = <0x6015000 0x1000>;
+		reg-names = "cti-base";
+
+		coresight-name = "coresight-cti5";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "core_clk", "core_a_clk";
+	};
+
+	cti6: cti@6016000 {
+		compatible = "arm,coresight-cti";
+		reg = <0x6016000 0x1000>;
+		reg-names = "cti-base";
+
+		coresight-name = "coresight-cti6";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "core_clk", "core_a_clk";
+	};
+
+	cti7: cti@6017000 {
+		compatible = "arm,coresight-cti";
+		reg = <0x6017000 0x1000>;
+		reg-names = "cti-base";
+
+		coresight-name = "coresight-cti7";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "core_clk", "core_a_clk";
+	};
+
+	cti8: cti@6018000 {
+		compatible = "arm,coresight-cti";
+		reg = <0x6018000 0x1000>;
+		reg-names = "cti-base";
+
+		coresight-name = "coresight-cti8";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "core_clk", "core_a_clk";
+	};
+
+	cti9: cti@6019000 {
+		compatible = "arm,coresight-cti";
+		reg = <0x6019000 0x1000>;
+		reg-names = "cti-base";
+
+		coresight-name = "coresight-cti9";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "core_clk", "core_a_clk";
+	};
+
+	cti10: cti@601a000 {
+		compatible = "arm,coresight-cti";
+		reg = <0x601a000 0x1000>;
+		reg-names = "cti-base";
+
+		coresight-name = "coresight-cti10";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "core_clk", "core_a_clk";
+	};
+
+	cti11: cti@601b000 {
+		compatible = "arm,coresight-cti";
+		reg = <0x601b000 0x1000>;
+		reg-names = "cti-base";
+
+		coresight-name = "coresight-cti11";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "core_clk", "core_a_clk";
+	};
+
+	cti12: cti@601c000 {
+		compatible = "arm,coresight-cti";
+		reg = <0x601c000 0x1000>;
+		reg-names = "cti-base";
+
+		coresight-name = "coresight-cti12";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "core_clk", "core_a_clk";
+	};
+
+	cti13: cti@601d000 {
+		compatible = "arm,coresight-cti";
+		reg = <0x601d000 0x1000>;
+		reg-names = "cti-base";
+
+		coresight-name = "coresight-cti13";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "core_clk", "core_a_clk";
+	};
+
+	cti14: cti@601e000 {
+		compatible = "arm,coresight-cti";
+		reg = <0x601e000 0x1000>;
+		reg-names = "cti-base";
+
+		coresight-name = "coresight-cti14";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "core_clk", "core_a_clk";
+	};
+
+	cti15: cti@601f000 {
+		compatible = "arm,coresight-cti";
+		reg = <0x601f000 0x1000>;
+		reg-names = "cti-base";
+
+		coresight-name = "coresight-cti15";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "core_clk", "core_a_clk";
+	};
+
+	funnel_qatb: funnel@6005000 {
+		compatible = "arm,primecell";
+		arm,primecell-periphid = <0x0003b908>;
+
+		reg = <0x6005000 0x1000>;
+		reg-names = "funnel-base";
+
+		coresight-name = "coresight-funnel-qatb";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "apb_pclk", "core_a_clk";
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				reg = <0>;
+				funnel_qatb_out_funnel_in0: endpoint {
+					remote-endpoint =
+					    <&funnel_in0_in_funnel_qatb>;
+				};
+			};
+			port@1 {
+				reg = <0>;
+				funnel_qatb_in_tpda: endpoint {
+					slave-mode;
+					remote-endpoint =
+						<&tpda_out_funnel_qatb>;
+				};
+			};
+		};
+	};
+
+	tpda: tpda@6004000 {
+		compatible = "qcom,coresight-tpda";
+		reg = <0x6004000 0x1000>;
+		reg-names = "tpda-base";
+
+		coresight-name = "coresight-tpda";
+
+		qcom,tpda-atid = <65>;
+		qcom,bc-elem-size = <7 32>,
+				    <9 32>;
+		qcom,tc-elem-size = <3 32>,
+				    <6 32>,
+				    <9 32>;
+		qcom,dsb-elem-size = <7 32>,
+				     <9 32>;
+		qcom,cmb-elem-size = <3 32>,
+				     <4 32>,
+				     <5 32>,
+				     <9 64>;
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "core_clk", "core_a_clk";
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			port@0 {
+				reg = <0>;
+				tpda_out_funnel_qatb: endpoint {
+					remote-endpoint =
+						<&funnel_qatb_in_tpda>;
+				};
+			};
+			port@2 {
+				reg = <5>;
+				tpda_in_tpdm_dcc: endpoint {
+					slave-mode;
+					remote-endpoint =
+						<&tpdm_dcc_out_tpda>;
+				};
+			};
+		};
+	};
+
+	tpdm_dcc: tpdm@7054000 {
+		compatible = "qcom,coresight-tpdm";
+		reg = <0x7054000 0x1000>;
+		reg-names = "tpdm-base";
+
+		coresight-name = "coresight-tpdm-dcc";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>,
+			 <&clock_gcc RPM_QDSS_A_CLK>;
+		clock-names = "core_clk", "core_a_clk";
+
+		port{
+			tpdm_dcc_out_tpda: endpoint {
+				remote-endpoint = <&tpda_in_tpdm_dcc>;
+			};
+		};
+	};
 };
diff --git a/arch/arm/boot/dts/qcom/msmfalcon.dtsi b/arch/arm/boot/dts/qcom/msmfalcon.dtsi
index 4765a8cb7c79..67748d6683c0 100644
--- a/arch/arm/boot/dts/qcom/msmfalcon.dtsi
+++ b/arch/arm/boot/dts/qcom/msmfalcon.dtsi
@@ -252,6 +252,18 @@
 		status = "ok";
 	};
 
+	wdog: qcom,wdt@17817000 {
+		status = "disabled";
+		compatible = "qcom,msm-watchdog";
+		reg = <0x17817000 0x1000>;
+		reg-names = "wdt-base";
+		interrupts = <0 3 0>, <0 4 0>;
+		qcom,bark-time = <11000>;
+		qcom,pet-time = <10000>;
+		qcom,ipi-ping;
+		qcom,wakeup-enable;
+	};
+
 	qcom,sps {
 		compatible = "qcom,msm_sps_4k";
 		qcom,pipe-attr-ee;
@@ -380,6 +392,16 @@
 		qcom,mpu-enabled;
 	};
 
+	dcc: dcc@10b3000 {
+		compatible = "qcom,dcc";
+		reg = <0x10b3000 0x1000>,
+		      <0x10b4000 0x800>;
+		reg-names = "dcc-base", "dcc-ram-base";
+
+		clocks = <&clock_gcc RPM_QDSS_CLK>;
+		clock-names = "dcc_clk";
+	};
+
 	qcom,glink-smem-native-xprt-modem@86000000 {
 		compatible = "qcom,glink-smem-native-xprt";
 		reg = <0x86000000 0x200000>,
diff --git a/arch/arm/configs/msmfalcon_defconfig b/arch/arm/configs/msmfalcon_defconfig
index 2b35b0f12787..103b90c7d6fc 100644
--- a/arch/arm/configs/msmfalcon_defconfig
+++ b/arch/arm/configs/msmfalcon_defconfig
@@ -416,6 +416,9 @@ CONFIG_GPIO_USB_DETECT=y
 CONFIG_USB_BAM=y
 CONFIG_REMOTE_SPINLOCK_MSM=y
 CONFIG_ARM_SMMU=y
+CONFIG_IOMMU_DEBUG=y
+CONFIG_IOMMU_DEBUG_TRACKING=y
+CONFIG_IOMMU_TESTS=y
 CONFIG_QCOM_COMMON_LOG=y
 CONFIG_MSM_SMEM=y
 CONFIG_QPNP_HAPTIC=y
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 598323a1842e..e683d147816c 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -190,6 +190,13 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
 	return 0;
 }
 
+static DEFINE_PER_CPU(unsigned long, cpu_efficiency) = SCHED_CAPACITY_SCALE;
+
+unsigned long arch_get_cpu_efficiency(int cpu)
+{
+	return per_cpu(cpu_efficiency, cpu);
+}
+
 #ifdef CONFIG_OF
 struct cpu_efficiency {
 	const char *compatible;
@@ -266,6 +273,7 @@ static int __init parse_dt_topology(void)
 	for_each_possible_cpu(cpu) {
 		const u32 *rate;
 		int len;
+		u32 efficiency;
 
 		/* too early to use cpu->of_node */
 		cn = of_get_cpu_node(cpu, NULL);
@@ -274,12 +282,26 @@ static int __init parse_dt_topology(void)
 			continue;
 		}
 
-		for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
-			if (of_device_is_compatible(cn, cpu_eff->compatible))
-				break;
+		/*
+		 * The CPU efficiency value passed from the device tree
+		 * overrides the value defined in the table_efficiency[]
+		 */
+		if (of_property_read_u32(cn, "efficiency", &efficiency) < 0) {
+
+			for (cpu_eff = table_efficiency;
+					cpu_eff->compatible; cpu_eff++)
 
-		if (cpu_eff->compatible == NULL)
-			continue;
+				if (of_device_is_compatible(cn,
+						cpu_eff->compatible))
+					break;
+
+			if (cpu_eff->compatible == NULL)
+				continue;
+
+			efficiency = cpu_eff->efficiency;
+		}
+
+		per_cpu(cpu_efficiency, cpu) = efficiency;
 
 		rate = of_get_property(cn, "clock-frequency", &len);
 		if (!rate || len != 4) {
@@ -288,7 +310,7 @@ static int __init parse_dt_topology(void)
 			continue;
 		}
 
-		capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency;
+		capacity = ((be32_to_cpup(rate)) >> 20) * efficiency;
 
 		/* Save min capacity of the system */
 		if (capacity < min_capacity)
diff --git a/drivers/clk/msm/clock-osm.c b/drivers/clk/msm/clock-osm.c
index 68274765a6c1..0d733f49f184 100644
--- a/drivers/clk/msm/clock-osm.c
+++ b/drivers/clk/msm/clock-osm.c
@@ -460,7 +460,7 @@ static int clk_osm_set_rate(struct clk *c, unsigned long rate)
 	}
 	pr_debug("rate: %lu --> index %d\n", rate, index);
 
-	if (cpuclk->llm_sw_overr) {
+	if (cpuclk->llm_sw_overr[0]) {
 		clk_osm_write_reg(cpuclk, cpuclk->llm_sw_overr[0],
 				  LLM_SW_OVERRIDE_REG);
 		clk_osm_write_reg(cpuclk, cpuclk->llm_sw_overr[1],
@@ -471,7 +471,7 @@ static int clk_osm_set_rate(struct clk *c, unsigned long rate)
 	/* Choose index and send request to OSM hardware */
 	clk_osm_write_reg(cpuclk, index, DCVS_PERF_STATE_DESIRED_REG);
 
-	if (cpuclk->llm_sw_overr) {
+	if (cpuclk->llm_sw_overr[0]) {
 		udelay(1);
 		clk_osm_write_reg(cpuclk, cpuclk->llm_sw_overr[2],
 				  LLM_SW_OVERRIDE_REG);
@@ -1882,8 +1882,8 @@ static void clk_osm_apm_vc_setup(struct clk_osm *c)
 		}
 		scm_io_write(c->pbases[OSM_BASE] + SEQ_REG(72),
 			     c->apm_crossover_vc);
-		clk_osm_write_reg(c, c->apm_threshold_vc,
-				  SEQ_REG(15));
+		scm_io_write(c->pbases[OSM_BASE] + SEQ_REG(15),
+			     c->apm_threshold_vc);
 		scm_io_write(c->pbases[OSM_BASE] + SEQ_REG(31),
 			     c->apm_threshold_vc != 0 ?
 			     c->apm_threshold_vc - 1 : 0xff);
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c
index 1f2178848664..0fcc0c3b0d49 100644
--- a/drivers/gpu/msm/kgsl_pwrctrl.c
+++ b/drivers/gpu/msm/kgsl_pwrctrl.c
@@ -1913,20 +1913,38 @@ void kgsl_idle_check(struct work_struct *work)
 {
 	struct kgsl_device *device = container_of(work, struct kgsl_device,
 							idle_check_ws);
+	int ret = 0;
+	unsigned int requested_state;
+
 	WARN_ON(device == NULL);
 	if (device == NULL)
 		return;
 
 	mutex_lock(&device->mutex);
 
+	requested_state = device->requested_state;
+
 	if (device->state == KGSL_STATE_ACTIVE
 		   || device->state ==  KGSL_STATE_NAP) {
 
-		if (!atomic_read(&device->active_cnt))
-			kgsl_pwrctrl_change_state(device,
+		if (!atomic_read(&device->active_cnt)) {
+			ret = kgsl_pwrctrl_change_state(device,
 					device->requested_state);
+			if (ret == -EBUSY) {
+				/*
+				 * If the GPU is currently busy, restore
+				 * the requested state and reschedule
+				 * idle work.
+				 */
+				kgsl_pwrctrl_request_state(device,
+					requested_state);
+				kgsl_schedule_work(&device->idle_check_ws);
+			}
+		}
+
+		if (!ret)
+			kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
 
-		kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
 		if (device->state == KGSL_STATE_ACTIVE)
 			mod_timer(&device->idle_timer,
 					jiffies +
diff --git a/drivers/hwtracing/coresight/coresight-csr.c b/drivers/hwtracing/coresight/coresight-csr.c
index dfb2922b6f33..3c18d686091a 100644
--- a/drivers/hwtracing/coresight/coresight-csr.c
+++ b/drivers/hwtracing/coresight/coresight-csr.c
@@ -191,8 +191,6 @@ static int csr_probe(struct platform_device *pdev)
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
 		return -ENOMEM;
-	/* Store the driver data pointer for use in exported functions */
-	csrdrvdata = drvdata;
 	drvdata->dev = &pdev->dev;
 	platform_set_drvdata(pdev, drvdata);
 
@@ -220,6 +218,8 @@ static int csr_probe(struct platform_device *pdev)
 	if (IS_ERR(drvdata->csdev))
 		return PTR_ERR(drvdata->csdev);
 
+	/* Store the driver data pointer for use in exported functions */
+	csrdrvdata = drvdata;
 	dev_info(dev, "CSR initialized\n");
 	return 0;
 }
diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c
index a234d61802ce..fb2f27299417 100644
--- a/drivers/hwtracing/coresight/coresight-stm.c
+++ b/drivers/hwtracing/coresight/coresight-stm.c
@@ -774,8 +774,6 @@ static int stm_probe(struct amba_device *adev, const struct amba_id *id)
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
 		return -ENOMEM;
-	/* Store the driver data pointer for use in exported functions */
-	stmdrvdata = drvdata;
 	drvdata->dev = &adev->dev;
 	dev_set_drvdata(dev, drvdata);
 
@@ -846,6 +844,8 @@ static int stm_probe(struct amba_device *adev, const struct amba_id *id)
 	if (boot_enable)
 		coresight_enable(drvdata->csdev);
 
+	/* Store the driver data pointer for use in exported functions */
+	stmdrvdata = drvdata;
 	return 0;
 err:
 	coresight_unregister(drvdata->csdev);
diff --git a/drivers/input/touchscreen/synaptics_dsx/synaptics_dsx_rmi_dev.c b/drivers/input/touchscreen/synaptics_dsx/synaptics_dsx_rmi_dev.c
index 4c341ffb6094..9d61eb110e2f 100644
--- a/drivers/input/touchscreen/synaptics_dsx/synaptics_dsx_rmi_dev.c
+++ b/drivers/input/touchscreen/synaptics_dsx/synaptics_dsx_rmi_dev.c
@@ -347,7 +347,7 @@ static ssize_t rmidev_read(struct file *filp, char __user *buf,
 		size_t count, loff_t *f_pos)
 {
 	ssize_t retval;
-	unsigned char tmpbuf[count + 1];
+	unsigned char *tmpbuf;
 	struct rmidev_data *dev_data = filp->private_data;
 
 	if (IS_ERR(dev_data)) {
@@ -361,6 +361,10 @@ static ssize_t rmidev_read(struct file *filp, char __user *buf,
 	if (count > (REG_ADDR_LIMIT - *f_pos))
 		count = REG_ADDR_LIMIT - *f_pos;
 
+	tmpbuf = kzalloc(count + 1, GFP_KERNEL);
+	if (!tmpbuf)
+		return -ENOMEM;
+
 	mutex_lock(&(dev_data->file_mutex));
 
 	retval = synaptics_rmi4_reg_read(rmidev->rmi4_data,
@@ -377,7 +381,7 @@ static ssize_t rmidev_read(struct file *filp, char __user *buf,
 
 clean_up:
 	mutex_unlock(&(dev_data->file_mutex));
-
+	kfree(tmpbuf);
 	return retval;
 }
 
@@ -393,7 +397,7 @@ static ssize_t rmidev_write(struct file *filp, const char __user *buf,
 		size_t count, loff_t *f_pos)
 {
 	ssize_t retval;
-	unsigned char tmpbuf[count + 1];
+	unsigned char *tmpbuf;
 	struct rmidev_data *dev_data = filp->private_data;
 
 	if (IS_ERR(dev_data)) {
@@ -407,9 +411,14 @@ static ssize_t rmidev_write(struct file *filp, const char __user *buf,
 	if (count > (REG_ADDR_LIMIT - *f_pos))
 		count = REG_ADDR_LIMIT - *f_pos;
 
-	if (copy_from_user(tmpbuf, buf, count))
-		return -EFAULT;
+	tmpbuf = kzalloc(count + 1, GFP_KERNEL);
+	if (!tmpbuf)
+		return -ENOMEM;
 
+	if (copy_from_user(tmpbuf, buf, count)) {
+		kfree(tmpbuf);
+		return -EFAULT;
+	}
 	mutex_lock(&(dev_data->file_mutex));
 
 	retval = synaptics_rmi4_reg_write(rmidev->rmi4_data,
@@ -420,7 +429,7 @@ static ssize_t rmidev_write(struct file *filp, const char __user *buf,
 		*f_pos += retval;
 
 	mutex_unlock(&(dev_data->file_mutex));
-
+	kfree(tmpbuf);
 	return retval;
 }
 
diff --git a/drivers/media/platform/msm/camera_v2/sensor/actuator/msm_actuator.c b/drivers/media/platform/msm/camera_v2/sensor/actuator/msm_actuator.c
index 0b3e4e1fcf04..bf3973888573 100644
--- a/drivers/media/platform/msm/camera_v2/sensor/actuator/msm_actuator.c
+++ b/drivers/media/platform/msm/camera_v2/sensor/actuator/msm_actuator.c
@@ -101,11 +101,6 @@ static void msm_actuator_parse_i2c_params(struct msm_actuator_ctrl_t *a_ctrl,
 	i2c_tbl = a_ctrl->i2c_reg_tbl;
 
 	for (i = 0; i < size; i++) {
-		/* check that the index into i2c_tbl cannot grow larger that
-		the allocated size of i2c_tbl */
-		if ((a_ctrl->total_steps + 1) < (a_ctrl->i2c_tbl_index))
-			break;
-
 		if (write_arr[i].reg_write_type == MSM_ACTUATOR_WRITE_DAC) {
 			value = (next_lens_position <<
 				write_arr[i].data_shift) |
@@ -119,6 +114,11 @@ static void msm_actuator_parse_i2c_params(struct msm_actuator_ctrl_t *a_ctrl,
 					i2c_byte2 = value & 0xFF;
 					CDBG("byte1:0x%x, byte2:0x%x\n",
 						i2c_byte1, i2c_byte2);
+					if (a_ctrl->i2c_tbl_index >
+						a_ctrl->total_steps) {
+						pr_err("failed:i2c table index out of bound\n");
+						break;
+					}
 					i2c_tbl[a_ctrl->i2c_tbl_index].
 						reg_addr = i2c_byte1;
 					i2c_tbl[a_ctrl->i2c_tbl_index].
@@ -139,6 +139,10 @@ static void msm_actuator_parse_i2c_params(struct msm_actuator_ctrl_t *a_ctrl,
 			i2c_byte2 = (hw_dword & write_arr[i].hw_mask) >>
 				write_arr[i].hw_shift;
 		}
+		if (a_ctrl->i2c_tbl_index > a_ctrl->total_steps) {
+			pr_err("failed: i2c table index out of bound\n");
+			break;
+		}
 		CDBG("i2c_byte1:0x%x, i2c_byte2:0x%x\n", i2c_byte1, i2c_byte2);
 		i2c_tbl[a_ctrl->i2c_tbl_index].reg_addr = i2c_byte1;
 		i2c_tbl[a_ctrl->i2c_tbl_index].reg_data = i2c_byte2;
diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c
index ce115c98a72f..d3e420f1b26b 100644
--- a/drivers/net/wireless/ath/wil6210/debugfs.c
+++ b/drivers/net/wireless/ath/wil6210/debugfs.c
@@ -1583,6 +1583,32 @@ static const struct file_operations fops_fw_capabilities = {
 	.llseek		= seq_lseek,
 };
 
+/*---------FW version------------*/
+static int wil_fw_version_debugfs_show(struct seq_file *s, void *data)
+{
+	struct wil6210_priv *wil = s->private;
+
+	if (wil->fw_version[0])
+		seq_printf(s, "%s\n", wil->fw_version);
+	else
+		seq_puts(s, "N/A\n");
+
+	return 0;
+}
+
+static int wil_fw_version_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, wil_fw_version_debugfs_show,
+			   inode->i_private);
+}
+
+static const struct file_operations fops_fw_version = {
+	.open		= wil_fw_version_seq_open,
+	.release	= single_release,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+};
+
 /*----------------*/
 static void wil6210_debugfs_init_blobs(struct wil6210_priv *wil,
 				       struct dentry *dbg)
@@ -1634,6 +1660,7 @@ static const struct {
 	{"led_cfg",	S_IRUGO | S_IWUSR,	&fops_led_cfg},
 	{"led_blink_time",	S_IRUGO | S_IWUSR,	&fops_led_blink_time},
 	{"fw_capabilities",	S_IRUGO,	&fops_fw_capabilities},
+	{"fw_version",	S_IRUGO,		&fops_fw_version},
 };
 
 static void wil6210_debugfs_init_files(struct wil6210_priv *wil,
@@ -1674,7 +1701,6 @@ static void wil6210_debugfs_init_isr(struct wil6210_priv *wil,
 static const struct dbg_off dbg_wil_off[] = {
 	WIL_FIELD(privacy,	S_IRUGO,		doff_u32),
 	WIL_FIELD(status[0],	S_IRUGO | S_IWUSR,	doff_ulong),
-	WIL_FIELD(fw_version,	S_IRUGO,		doff_u32),
 	WIL_FIELD(hw_version,	S_IRUGO,		doff_x32),
 	WIL_FIELD(recovery_count, S_IRUGO,		doff_u32),
 	WIL_FIELD(ap_isolate,	S_IRUGO,		doff_u32),
diff --git a/drivers/net/wireless/ath/wil6210/fw.h b/drivers/net/wireless/ath/wil6210/fw.h
index c3191c61832c..2f2b910501ba 100644
--- a/drivers/net/wireless/ath/wil6210/fw.h
+++ b/drivers/net/wireless/ath/wil6210/fw.h
@@ -102,6 +102,9 @@ struct wil_fw_record_verify { /* type == wil_fw_verify */
 /* file header
  * First record of every file
  */
+/* the FW version prefix in the comment */
+#define WIL_FW_VERSION_PREFIX "FW version: "
+#define WIL_FW_VERSION_PREFIX_LEN (sizeof(WIL_FW_VERSION_PREFIX) - 1)
 struct wil_fw_record_file_header {
 	__le32 signature ; /* Wilocity signature */
 	__le32 reserved;
diff --git a/drivers/net/wireless/ath/wil6210/fw_inc.c b/drivers/net/wireless/ath/wil6210/fw_inc.c
index 3860238840ba..8f40eb301924 100644
--- a/drivers/net/wireless/ath/wil6210/fw_inc.c
+++ b/drivers/net/wireless/ath/wil6210/fw_inc.c
@@ -223,6 +223,13 @@ static int fw_handle_file_header(struct wil6210_priv *wil, const void *data,
 	wil_hex_dump_fw("", DUMP_PREFIX_OFFSET, 16, 1, d->comment,
 			sizeof(d->comment), true);
 
+	if (!memcmp(d->comment, WIL_FW_VERSION_PREFIX,
+		    WIL_FW_VERSION_PREFIX_LEN))
+		memcpy(wil->fw_version,
+		       d->comment + WIL_FW_VERSION_PREFIX_LEN,
+		       min(sizeof(d->comment) - WIL_FW_VERSION_PREFIX_LEN,
+			   sizeof(wil->fw_version) - 1));
+
 	return 0;
 }
 
diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c
index 88dfb93d0f3d..a509841c3187 100644
--- a/drivers/net/wireless/ath/wil6210/main.c
+++ b/drivers/net/wireless/ath/wil6210/main.c
@@ -892,6 +892,7 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw)
 			 WIL_FW2_NAME);
 
 		wil_halt_cpu(wil);
+		memset(wil->fw_version, 0, sizeof(wil->fw_version));
 		/* Loading f/w from the file */
 		rc = wil_request_firmware(wil, WIL_FW_NAME, true);
 		if (rc)
diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c
index 81a4c6a684f5..f4fca9d4eedf 100644
--- a/drivers/net/wireless/ath/wil6210/netdev.c
+++ b/drivers/net/wireless/ath/wil6210/netdev.c
@@ -222,6 +222,8 @@ int wil_if_add(struct wil6210_priv *wil)
 
 	wil_dbg_misc(wil, "entered");
 
+	strlcpy(wiphy->fw_version, wil->fw_version, sizeof(wiphy->fw_version));
+
 	rc = wiphy_register(wiphy);
 	if (rc < 0) {
 		wil_err(wil, "failed to register wiphy, err %d\n", rc);
diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h
index 1e0536adb6e7..ce33e919d321 100644
--- a/drivers/net/wireless/ath/wil6210/wil6210.h
+++ b/drivers/net/wireless/ath/wil6210/wil6210.h
@@ -17,6 +17,7 @@
 #ifndef __WIL6210_H__
 #define __WIL6210_H__
 
+#include <linux/etherdevice.h>
 #include <linux/netdevice.h>
 #include <linux/wireless.h>
 #include <net/cfg80211.h>
@@ -579,7 +580,7 @@ struct wil6210_priv {
 	struct wireless_dev *wdev;
 	void __iomem *csr;
 	DECLARE_BITMAP(status, wil_status_last);
-	u32 fw_version;
+	u8 fw_version[ETHTOOL_FWVERS_LEN];
 	u32 hw_version;
 	const char *hw_name;
 	DECLARE_BITMAP(hw_capabilities, hw_capability_last);
diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index 0c5db9584159..6ec3ddc5b6f1 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
@@ -312,14 +312,14 @@ static void wmi_evt_ready(struct wil6210_priv *wil, int id, void *d, int len)
 	struct wireless_dev *wdev = wil->wdev;
 	struct wmi_ready_event *evt = d;
 
-	wil->fw_version = le32_to_cpu(evt->sw_version);
 	wil->n_mids = evt->numof_additional_mids;
 
-	wil_info(wil, "FW ver. %d; MAC %pM; %d MID's\n", wil->fw_version,
+	wil_info(wil, "FW ver. %s(SW %d); MAC %pM; %d MID's\n",
+		 wil->fw_version, le32_to_cpu(evt->sw_version),
 		 evt->mac, wil->n_mids);
 	/* ignore MAC address, we already have it from the boot loader */
-	snprintf(wdev->wiphy->fw_version, sizeof(wdev->wiphy->fw_version),
-		 "%d", wil->fw_version);
+	strlcpy(wdev->wiphy->fw_version, wil->fw_version,
+		sizeof(wdev->wiphy->fw_version));
 
 	wil_set_recovery_state(wil, fw_recovery_idle);
 	set_bit(wil_status_fwready, wil->status);
diff --git a/drivers/power/qcom-charger/battery_current_limit.c b/drivers/power/qcom-charger/battery_current_limit.c
index 2bda5ce1a8c4..951d0544efa0 100644
--- a/drivers/power/qcom-charger/battery_current_limit.c
+++ b/drivers/power/qcom-charger/battery_current_limit.c
@@ -174,6 +174,9 @@ struct bcl_context {
 	struct qpnp_adc_tm_btm_param btm_vph_adc_param;
 	/* Low temp min freq limit requested by thermal */
 	uint32_t thermal_freq_limit;
+	/* state of charge notifier */
+	struct notifier_block psy_nb;
+	struct work_struct soc_mitig_work;
 
 	/* BCL Peripheral monitor parameters */
 	struct bcl_threshold ibat_high_thresh;
@@ -204,8 +207,6 @@ static DEFINE_MUTEX(bcl_hotplug_mutex);
 static bool bcl_hotplug_enabled;
 static uint32_t battery_soc_val = 100;
 static uint32_t soc_low_threshold;
-static struct power_supply *bcl_psy;
-static struct power_supply_desc bcl_psy_des;
 static const char bcl_psy_name[] = "bcl";
 
 static void bcl_handle_hotplug(struct work_struct *work)
@@ -277,22 +278,34 @@ static void update_cpu_freq(void)
 	trace_bcl_sw_mitigation_event("End Frequency Mitigation");
 }
 
-static void power_supply_callback(struct power_supply *psy)
+static void soc_mitigate(struct work_struct *work)
 {
-	static struct power_supply *bms_psy;
+	if (bcl_hotplug_enabled)
+		queue_work(gbcl->bcl_hotplug_wq, &bcl_hotplug_work);
+	update_cpu_freq();
+}
+
+static int power_supply_callback(struct notifier_block *nb,
+				  unsigned long event, void *data)
+{
+	struct power_supply *psy = data;
+	static struct power_supply *batt_psy;
 	union power_supply_propval ret = {0,};
 	int battery_percentage;
 	enum bcl_threshold_state prev_soc_state;
 
 	if (gbcl->bcl_mode != BCL_DEVICE_ENABLED) {
 		pr_debug("BCL is not enabled\n");
-		return;
+		return NOTIFY_OK;
 	}
 
-	if (!bms_psy)
-		bms_psy = power_supply_get_by_name("bms");
-	if (bms_psy) {
-		battery_percentage = power_supply_get_property(bms_psy,
+	if (strcmp(psy->desc->name, "battery"))
+		return NOTIFY_OK;
+
+	if (!batt_psy)
+		batt_psy = power_supply_get_by_name("battery");
+	if (batt_psy) {
+		battery_percentage = power_supply_get_property(batt_psy,
 				POWER_SUPPLY_PROP_CAPACITY, &ret);
 		battery_percentage = ret.intval;
 		battery_soc_val = battery_percentage;
@@ -302,15 +315,14 @@ static void power_supply_callback(struct power_supply *psy)
 		bcl_soc_state = (battery_soc_val <= soc_low_threshold) ?
 					BCL_LOW_THRESHOLD : BCL_HIGH_THRESHOLD;
 		if (bcl_soc_state == prev_soc_state)
-			return;
+			return NOTIFY_OK;
 		trace_bcl_sw_mitigation_event(
 			(bcl_soc_state == BCL_LOW_THRESHOLD)
 			? "trigger SoC mitigation"
 			: "clear SoC mitigation");
-		if (bcl_hotplug_enabled)
-			queue_work(gbcl->bcl_hotplug_wq, &bcl_hotplug_work);
-		update_cpu_freq();
+		schedule_work(&gbcl->soc_mitig_work);
 	}
+	return NOTIFY_OK;
 }
 
 static int bcl_get_battery_voltage(int *vbatt_mv)
@@ -624,7 +636,6 @@ static void bcl_periph_vbat_notify(enum bcl_trip_type type, int trip_temp,
 static void bcl_periph_mode_set(enum bcl_device_mode mode)
 {
 	int ret = 0;
-	struct power_supply_config bcl_psy_cfg = {};
 
 	if (mode == BCL_DEVICE_ENABLED) {
 		/*
@@ -632,15 +643,11 @@ static void bcl_periph_mode_set(enum bcl_device_mode mode)
 		 * power state changes. Make sure we read the current SoC
 		 * and mitigate.
 		 */
-		power_supply_callback(bcl_psy);
-		bcl_psy_cfg.num_supplicants = 0;
-		bcl_psy_cfg.drv_data = gbcl;
-
-		bcl_psy = power_supply_register(gbcl->dev, &bcl_psy_des,
-				&bcl_psy_cfg);
-		if (IS_ERR(bcl_psy)) {
-			pr_err("Unable to register bcl_psy rc = %ld\n",
-				PTR_ERR(bcl_psy));
+		power_supply_callback(&gbcl->psy_nb, 1, gbcl);
+		ret = power_supply_reg_notifier(&gbcl->psy_nb);
+		if (ret < 0) {
+			pr_err("Unable to register soc notifier rc = %d\n",
+				ret);
 			return;
 		}
 		ret = msm_bcl_set_threshold(BCL_PARAM_CURRENT, BCL_HIGH_TRIP,
@@ -678,7 +685,7 @@ static void bcl_periph_mode_set(enum bcl_device_mode mode)
 		}
 		gbcl->btm_mode = BCL_VPH_MONITOR_MODE;
 	} else {
-		power_supply_unregister(bcl_psy);
+		power_supply_unreg_notifier(&gbcl->psy_nb);
 		ret = msm_bcl_disable();
 		if (ret) {
 			pr_err("Error disabling BCL\n");
@@ -1627,19 +1634,6 @@ btm_probe_exit:
 	return ret;
 }
 
-static int bcl_battery_get_property(struct power_supply *psy,
-				enum power_supply_property prop,
-				union power_supply_propval *val)
-{
-	return 0;
-}
-static int bcl_battery_set_property(struct power_supply *psy,
-				enum power_supply_property prop,
-				const union power_supply_propval *val)
-{
-	return 0;
-}
-
 static uint32_t get_mask_from_core_handle(struct platform_device *pdev,
 						const char *key)
 {
@@ -1725,12 +1719,8 @@ static int bcl_probe(struct platform_device *pdev)
 		pr_err("Cannot create bcl sysfs\n");
 		return ret;
 	}
-	bcl_psy_des.name = bcl_psy_name;
-	bcl_psy_des.type = POWER_SUPPLY_TYPE_BMS;
-	bcl_psy_des.get_property     = bcl_battery_get_property;
-	bcl_psy_des.set_property     = bcl_battery_set_property;
-	bcl_psy_des.num_properties = 0;
-	bcl_psy_des.external_power_changed = power_supply_callback;
+	INIT_WORK(&bcl->soc_mitig_work, soc_mitigate);
+	bcl->psy_nb.notifier_call = power_supply_callback;
 	bcl->bcl_hotplug_wq = alloc_workqueue("bcl_hotplug_wq",  WQ_HIGHPRI, 0);
 	if (!bcl->bcl_hotplug_wq) {
 		pr_err("Workqueue alloc failed\n");
@@ -1773,6 +1763,7 @@ static int bcl_remove(struct platform_device *pdev)
 	int cpu;
 
 	/* De-register KTM handle */
+	power_supply_unreg_notifier(&gbcl->psy_nb);
 	if (gbcl->hotplug_handle)
 		devmgr_unregister_mitigation_client(&pdev->dev,
 			gbcl->hotplug_handle);
diff --git a/drivers/soc/qcom/jtag-fuse.c b/drivers/soc/qcom/jtag-fuse.c
index 0b05ce9a22bb..0f347723e378 100644
--- a/drivers/soc/qcom/jtag-fuse.c
+++ b/drivers/soc/qcom/jtag-fuse.c
@@ -152,8 +152,6 @@ static int jtag_fuse_probe(struct platform_device *pdev)
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
 		return -ENOMEM;
-	/* Store the driver data pointer for use in exported functions */
-	fusedrvdata = drvdata;
 	drvdata->dev = &pdev->dev;
 	platform_set_drvdata(pdev, drvdata);
 
@@ -174,6 +172,8 @@ static int jtag_fuse_probe(struct platform_device *pdev)
 	if (!drvdata->base)
 		return -ENOMEM;
 
+	/* Store the driver data pointer for use in exported functions */
+	fusedrvdata = drvdata;
 	dev_info(dev, "JTag Fuse initialized\n");
 	return 0;
 }
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index c2fa3ecb0dce..146efefde2a1 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -10,30 +10,96 @@
 
 struct percpu_rw_semaphore {
 	struct rcu_sync		rss;
-	unsigned int __percpu	*fast_read_ctr;
+	unsigned int __percpu	*read_count;
 	struct rw_semaphore	rw_sem;
-	atomic_t		slow_read_ctr;
-	wait_queue_head_t	write_waitq;
+	wait_queue_head_t	writer;
+	int			readers_block;
 };
 
-extern void percpu_down_read(struct percpu_rw_semaphore *);
-extern int  percpu_down_read_trylock(struct percpu_rw_semaphore *);
-extern void percpu_up_read(struct percpu_rw_semaphore *);
+extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
+extern void __percpu_up_read(struct percpu_rw_semaphore *);
+
+static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+{
+	might_sleep();
+
+	rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_);
+
+	preempt_disable();
+	/*
+	 * We are in an RCU-sched read-side critical section, so the writer
+	 * cannot both change sem->state from readers_fast and start checking
+	 * counters while we are here. So if we see !sem->state, we know that
+	 * the writer won't be checking until we're past the preempt_enable()
+	 * and that one the synchronize_sched() is done, the writer will see
+	 * anything we did within this RCU-sched read-size critical section.
+	 */
+	__this_cpu_inc(*sem->read_count);
+	if (unlikely(!rcu_sync_is_idle(&sem->rss)))
+		__percpu_down_read(sem, false); /* Unconditional memory barrier */
+	preempt_enable();
+	/*
+	 * The barrier() from preempt_enable() prevents the compiler from
+	 * bleeding the critical section out.
+	 */
+}
+
+static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
+{
+	int ret = 1;
+
+	preempt_disable();
+	/*
+	 * Same as in percpu_down_read().
+	 */
+	__this_cpu_inc(*sem->read_count);
+	if (unlikely(!rcu_sync_is_idle(&sem->rss)))
+		ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
+	preempt_enable();
+	/*
+	 * The barrier() from preempt_enable() prevents the compiler from
+	 * bleeding the critical section out.
+	 */
+
+	if (ret)
+		rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 1, _RET_IP_);
+
+	return ret;
+}
+
+static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
+{
+	/*
+	 * The barrier() in preempt_disable() prevents the compiler from
+	 * bleeding the critical section out.
+	 */
+	preempt_disable();
+	/*
+	 * Same as in percpu_down_read().
+	 */
+	if (likely(rcu_sync_is_idle(&sem->rss)))
+		__this_cpu_dec(*sem->read_count);
+	else
+		__percpu_up_read(sem); /* Unconditional memory barrier */
+	preempt_enable();
+
+	rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_);
+}
 
 extern void percpu_down_write(struct percpu_rw_semaphore *);
 extern void percpu_up_write(struct percpu_rw_semaphore *);
 
 extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
 				const char *, struct lock_class_key *);
+
 extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
 
-#define percpu_init_rwsem(brw)	\
+#define percpu_init_rwsem(sem)					\
 ({								\
 	static struct lock_class_key rwsem_key;			\
-	__percpu_init_rwsem(brw, #brw, &rwsem_key);		\
+	__percpu_init_rwsem(sem, #sem, &rwsem_key);		\
 })
 
-
 #define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
 
 static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index a63a33e6196e..ece7ed9a4a70 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -59,6 +59,7 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
 }
 
 extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type);
+extern void rcu_sync_enter_start(struct rcu_sync *);
 extern void rcu_sync_enter(struct rcu_sync *);
 extern void rcu_sync_exit(struct rcu_sync *);
 extern void rcu_sync_dtor(struct rcu_sync *);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ad4a12371069..cc6c7d0a6758 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5376,6 +5376,12 @@ int __init cgroup_init(void)
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
 
+	/*
+	 * The latency of the synchronize_sched() is too high for cgroups,
+	 * avoid it at the cost of forcing all readers into the slow path.
+	 */
+	rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
+
 	mutex_lock(&cgroup_mutex);
 
 	/* Add init_css_set to the hash table */
diff --git a/kernel/fork.c b/kernel/fork.c
index c9eb86b646ab..e89d0bae6f20 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1375,7 +1375,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->real_start_time = ktime_get_boot_ns();
 	p->io_context = NULL;
 	p->audit_context = NULL;
-	threadgroup_change_begin(current);
 	cgroup_fork(p);
 #ifdef CONFIG_NUMA
 	p->mempolicy = mpol_dup(p->mempolicy);
@@ -1527,6 +1526,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	INIT_LIST_HEAD(&p->thread_group);
 	p->task_works = NULL;
 
+	threadgroup_change_begin(current);
 	/*
 	 * Ensure that the cgroup subsystem policies allow the new process to be
 	 * forked. It should be noted the the new process's css_set can be changed
@@ -1627,6 +1627,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 bad_fork_cancel_cgroup:
 	cgroup_cancel_fork(p, cgrp_ss_priv);
 bad_fork_free_pid:
+	threadgroup_change_end(current);
 	if (pid != &init_struct_pid)
 		free_pid(pid);
 bad_fork_cleanup_io:
@@ -1657,7 +1658,6 @@ bad_fork_cleanup_policy:
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_threadgroup_lock:
 #endif
-	threadgroup_change_end(current);
 	delayacct_tsk_free(p);
 bad_fork_cleanup_count:
 	atomic_dec(&p->cred->user->processes);
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index f231e0bb311c..ce182599cf2e 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -8,151 +8,186 @@
 #include <linux/sched.h>
 #include <linux/errno.h>
 
-int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
+int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
 			const char *name, struct lock_class_key *rwsem_key)
 {
-	brw->fast_read_ctr = alloc_percpu(int);
-	if (unlikely(!brw->fast_read_ctr))
+	sem->read_count = alloc_percpu(int);
+	if (unlikely(!sem->read_count))
 		return -ENOMEM;
 
 	/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
-	__init_rwsem(&brw->rw_sem, name, rwsem_key);
-	rcu_sync_init(&brw->rss, RCU_SCHED_SYNC);
-	atomic_set(&brw->slow_read_ctr, 0);
-	init_waitqueue_head(&brw->write_waitq);
+	rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
+	__init_rwsem(&sem->rw_sem, name, rwsem_key);
+	init_waitqueue_head(&sem->writer);
+	sem->readers_block = 0;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
 
-void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
+void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
 {
 	/*
 	 * XXX: temporary kludge. The error path in alloc_super()
 	 * assumes that percpu_free_rwsem() is safe after kzalloc().
 	 */
-	if (!brw->fast_read_ctr)
+	if (!sem->read_count)
 		return;
 
-	rcu_sync_dtor(&brw->rss);
-	free_percpu(brw->fast_read_ctr);
-	brw->fast_read_ctr = NULL; /* catch use after free bugs */
+	rcu_sync_dtor(&sem->rss);
+	free_percpu(sem->read_count);
+	sem->read_count = NULL; /* catch use after free bugs */
 }
+EXPORT_SYMBOL_GPL(percpu_free_rwsem);
 
-/*
- * This is the fast-path for down_read/up_read. If it succeeds we rely
- * on the barriers provided by rcu_sync_enter/exit; see the comments in
- * percpu_down_write() and percpu_up_write().
- *
- * If this helper fails the callers rely on the normal rw_semaphore and
- * atomic_dec_and_test(), so in this case we have the necessary barriers.
- */
-static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
+int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
 {
-	bool success;
+	/*
+	 * Due to having preemption disabled the decrement happens on
+	 * the same CPU as the increment, avoiding the
+	 * increment-on-one-CPU-and-decrement-on-another problem.
+	 *
+	 * If the reader misses the writer's assignment of readers_block, then
+	 * the writer is guaranteed to see the reader's increment.
+	 *
+	 * Conversely, any readers that increment their sem->read_count after
+	 * the writer looks are guaranteed to see the readers_block value,
+	 * which in turn means that they are guaranteed to immediately
+	 * decrement their sem->read_count, so that it doesn't matter that the
+	 * writer missed them.
+	 */
 
-	preempt_disable();
-	success = rcu_sync_is_idle(&brw->rss);
-	if (likely(success))
-		__this_cpu_add(*brw->fast_read_ctr, val);
-	preempt_enable();
+	smp_mb(); /* A matches D */
 
-	return success;
-}
+	/*
+	 * If !readers_block the critical section starts here, matched by the
+	 * release in percpu_up_write().
+	 */
+	if (likely(!smp_load_acquire(&sem->readers_block)))
+		return 1;
 
-/*
- * Like the normal down_read() this is not recursive, the writer can
- * come after the first percpu_down_read() and create the deadlock.
- *
- * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep,
- * percpu_up_read() does rwsem_release(). This pairs with the usage
- * of ->rw_sem in percpu_down/up_write().
- */
-void percpu_down_read(struct percpu_rw_semaphore *brw)
-{
-	might_sleep();
-	rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
+	/*
+	 * Per the above comment; we still have preemption disabled and
+	 * will thus decrement on the same CPU as we incremented.
+	 */
+	__percpu_up_read(sem);
 
-	if (likely(update_fast_ctr(brw, +1)))
-		return;
+	if (try)
+		return 0;
 
-	/* Avoid rwsem_acquire_read() and rwsem_release() */
-	__down_read(&brw->rw_sem);
-	atomic_inc(&brw->slow_read_ctr);
-	__up_read(&brw->rw_sem);
-}
-EXPORT_SYMBOL_GPL(percpu_down_read);
+	/*
+	 * We either call schedule() in the wait, or we'll fall through
+	 * and reschedule on the preempt_enable() in percpu_down_read().
+	 */
+	preempt_enable_no_resched();
 
-int percpu_down_read_trylock(struct percpu_rw_semaphore *brw)
-{
-	if (unlikely(!update_fast_ctr(brw, +1))) {
-		if (!__down_read_trylock(&brw->rw_sem))
-			return 0;
-		atomic_inc(&brw->slow_read_ctr);
-		__up_read(&brw->rw_sem);
-	}
-
-	rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 1, _RET_IP_);
+	/*
+	 * Avoid lockdep for the down/up_read() we already have them.
+	 */
+	__down_read(&sem->rw_sem);
+	this_cpu_inc(*sem->read_count);
+	__up_read(&sem->rw_sem);
+
+	preempt_disable();
 	return 1;
 }
+EXPORT_SYMBOL_GPL(__percpu_down_read);
 
-void percpu_up_read(struct percpu_rw_semaphore *brw)
+void __percpu_up_read(struct percpu_rw_semaphore *sem)
 {
-	rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_);
-
-	if (likely(update_fast_ctr(brw, -1)))
-		return;
+	smp_mb(); /* B matches C */
+	/*
+	 * In other words, if they see our decrement (presumably to aggregate
+	 * zero, as that is the only time it matters) they will also see our
+	 * critical section.
+	 */
+	__this_cpu_dec(*sem->read_count);
 
-	/* false-positive is possible but harmless */
-	if (atomic_dec_and_test(&brw->slow_read_ctr))
-		wake_up_all(&brw->write_waitq);
+	/* Prod writer to recheck readers_active */
+	wake_up(&sem->writer);
 }
-EXPORT_SYMBOL_GPL(percpu_up_read);
+EXPORT_SYMBOL_GPL(__percpu_up_read);
+
+#define per_cpu_sum(var)						\
+({									\
+	typeof(var) __sum = 0;						\
+	int cpu;							\
+	compiletime_assert_atomic_type(__sum);				\
+	for_each_possible_cpu(cpu)					\
+		__sum += per_cpu(var, cpu);				\
+	__sum;								\
+})
 
-static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
+/*
+ * Return true if the modular sum of the sem->read_count per-CPU variable is
+ * zero.  If this sum is zero, then it is stable due to the fact that if any
+ * newly arriving readers increment a given counter, they will immediately
+ * decrement that same counter.
+ */
+static bool readers_active_check(struct percpu_rw_semaphore *sem)
 {
-	unsigned int sum = 0;
-	int cpu;
+	if (per_cpu_sum(*sem->read_count) != 0)
+		return false;
+
+	/*
+	 * If we observed the decrement; ensure we see the entire critical
+	 * section.
+	 */
 
-	for_each_possible_cpu(cpu) {
-		sum += per_cpu(*brw->fast_read_ctr, cpu);
-		per_cpu(*brw->fast_read_ctr, cpu) = 0;
-	}
+	smp_mb(); /* C matches B */
 
-	return sum;
+	return true;
 }
 
-void percpu_down_write(struct percpu_rw_semaphore *brw)
+void percpu_down_write(struct percpu_rw_semaphore *sem)
 {
+	/* Notify readers to take the slow path. */
+	rcu_sync_enter(&sem->rss);
+
+	down_write(&sem->rw_sem);
+
 	/*
-	 * Make rcu_sync_is_idle() == F and thus disable the fast-path in
-	 * percpu_down_read() and percpu_up_read(), and wait for gp pass.
-	 *
-	 * The latter synchronises us with the preceding readers which used
-	 * the fast-past, so we can not miss the result of __this_cpu_add()
-	 * or anything else inside their criticial sections.
+	 * Notify new readers to block; up until now, and thus throughout the
+	 * longish rcu_sync_enter() above, new readers could still come in.
 	 */
-	rcu_sync_enter(&brw->rss);
+	WRITE_ONCE(sem->readers_block, 1);
 
-	/* exclude other writers, and block the new readers completely */
-	down_write(&brw->rw_sem);
+	smp_mb(); /* D matches A */
 
-	/* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
-	atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
+	/*
+	 * If they don't see our writer of readers_block, then we are
+	 * guaranteed to see their sem->read_count increment, and therefore
+	 * will wait for them.
+	 */
 
-	/* wait for all readers to complete their percpu_up_read() */
-	wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
+	/* Wait for all now active readers to complete. */
+	wait_event(sem->writer, readers_active_check(sem));
 }
 EXPORT_SYMBOL_GPL(percpu_down_write);
 
-void percpu_up_write(struct percpu_rw_semaphore *brw)
+void percpu_up_write(struct percpu_rw_semaphore *sem)
 {
-	/* release the lock, but the readers can't use the fast-path */
-	up_write(&brw->rw_sem);
 	/*
-	 * Enable the fast-path in percpu_down_read() and percpu_up_read()
-	 * but only after another gp pass; this adds the necessary barrier
-	 * to ensure the reader can't miss the changes done by us.
+	 * Signal the writer is done, no fast path yet.
+	 *
+	 * One reason that we cannot just immediately flip to readers_fast is
+	 * that new readers might fail to see the results of this writer's
+	 * critical section.
+	 *
+	 * Therefore we force it through the slow path which guarantees an
+	 * acquire and thereby guarantees the critical section's consistency.
+	 */
+	smp_store_release(&sem->readers_block, 0);
+
+	/*
+	 * Release the write lock, this will allow readers back in the game.
+	 */
+	up_write(&sem->rw_sem);
+
+	/*
+	 * Once this completes (at least one RCU-sched grace period hence) the
+	 * reader fast path will be available again. Safe to use outside the
+	 * exclusive write lock because its counting.
 	 */
-	rcu_sync_exit(&brw->rss);
+	rcu_sync_exit(&sem->rss);
 }
 EXPORT_SYMBOL_GPL(percpu_up_write);
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index be922c9f3d37..e358313a0d6c 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -83,6 +83,18 @@ void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type)
 }
 
 /**
+ * Must be called after rcu_sync_init() and before first use.
+ *
+ * Ensures rcu_sync_is_idle() returns false and rcu_sync_{enter,exit}()
+ * pairs turn into NO-OPs.
+ */
+void rcu_sync_enter_start(struct rcu_sync *rsp)
+{
+	rsp->gp_count++;
+	rsp->gp_state = GP_PASSED;
+}
+
+/**
  * rcu_sync_enter() - Force readers onto slowpath
  * @rsp: Pointer to rcu_sync structure to use for synchronization
  *
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4b5b0c3cef7a..4cdf967b67c1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7812,6 +7812,7 @@ void __init sched_init_smp(void)
 	hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
 
 	update_cluster_topology();
+	init_sched_hmp_boost_policy();
 
 	init_hrtick();
 
@@ -7860,6 +7861,7 @@ void __init sched_init(void)
 
 	BUG_ON(num_possible_cpus() > BITS_PER_LONG);
 
+	sched_hmp_parse_dt();
 	init_clusters();
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c
index 61e352aeec00..1e7b4cd4e64c 100644
--- a/kernel/sched/hmp.c
+++ b/kernel/sched/hmp.c
@@ -17,6 +17,7 @@
 #include <linux/cpufreq.h>
 #include <linux/list_sort.h>
 #include <linux/syscore_ops.h>
+#include <linux/of.h>
 
 #include "sched.h"
 #include "core_ctl.h"
@@ -225,6 +226,52 @@ fail:
 	return ret;
 }
 
+/*
+ * It is possible that CPUs of the same micro architecture can have slight
+ * difference in the efficiency due to other factors like cache size. The
+ * BOOST_ON_BIG policy may not be optimial for such systems. The required
+ * boost policy can be specified via device tree to handle this.
+ */
+static int __read_mostly sched_boost_policy = SCHED_BOOST_NONE;
+
+/*
+ * This should be called after clusters are populated and
+ * the respective efficiency values are initialized.
+ */
+void init_sched_hmp_boost_policy(void)
+{
+	/*
+	 * Initialize the boost type here if it is not passed from
+	 * device tree.
+	 */
+	if (sched_boost_policy == SCHED_BOOST_NONE) {
+		if (max_possible_efficiency != min_possible_efficiency)
+			sched_boost_policy = SCHED_BOOST_ON_BIG;
+		else
+			sched_boost_policy = SCHED_BOOST_ON_ALL;
+	}
+}
+
+void sched_hmp_parse_dt(void)
+{
+	struct device_node *sn;
+	const char *boost_policy;
+
+	if (!sched_enable_hmp)
+		return;
+
+	sn = of_find_node_by_path("/sched-hmp");
+	if (!sn)
+		return;
+
+	if (!of_property_read_string(sn, "boost-policy", &boost_policy)) {
+		if (!strcmp(boost_policy, "boost-on-big"))
+			sched_boost_policy = SCHED_BOOST_ON_BIG;
+		else if (!strcmp(boost_policy, "boost-on-all"))
+			sched_boost_policy = SCHED_BOOST_ON_ALL;
+	}
+}
+
 unsigned int max_possible_efficiency = 1;
 unsigned int min_possible_efficiency = UINT_MAX;
 
@@ -1169,12 +1216,9 @@ int task_load_will_fit(struct task_struct *p, u64 task_load, int cpu,
 
 enum sched_boost_type sched_boost_type(void)
 {
-	if (sched_boost()) {
-		if (min_possible_efficiency != max_possible_efficiency)
-			return SCHED_BOOST_ON_BIG;
-		else
-			return SCHED_BOOST_ON_ALL;
-	}
+	if (sched_boost())
+		return sched_boost_policy;
+
 	return SCHED_BOOST_NONE;
 }
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 1b641e60233e..ada5e580e968 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1399,6 +1399,8 @@ extern u64 cpu_upmigrate_discourage_read_u64(struct cgroup_subsys_state *css,
 					struct cftype *cft);
 extern int cpu_upmigrate_discourage_write_u64(struct cgroup_subsys_state *css,
 				struct cftype *cft, u64 upmigrate_discourage);
+extern void sched_hmp_parse_dt(void);
+extern void init_sched_hmp_boost_policy(void);
 
 #else	/* CONFIG_SCHED_HMP */
 
@@ -1589,6 +1591,8 @@ static inline void post_big_task_count_change(void) { }
 static inline void set_hmp_defaults(void) { }
 
 static inline void clear_reserved(int cpu) { }
+static inline void sched_hmp_parse_dt(void) {}
+static inline  void init_sched_hmp_boost_policy(void) {}
 
 #define trace_sched_cpu_load(...)
 #define trace_sched_cpu_load_lb(...)
diff --git a/kernel/sched/sched_avg.c b/kernel/sched/sched_avg.c
index c70e0466c36c..29d8a26a78ed 100644
--- a/kernel/sched/sched_avg.c
+++ b/kernel/sched/sched_avg.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, 2015, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2012, 2015-2016, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -60,17 +60,17 @@ void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg)
 
 		spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
 		curr_time = sched_clock();
+		diff = curr_time - per_cpu(last_time, cpu);
+		BUG_ON((s64)diff < 0);
+
 		tmp_avg += per_cpu(nr_prod_sum, cpu);
-		tmp_avg += per_cpu(nr, cpu) *
-			(curr_time - per_cpu(last_time, cpu));
+		tmp_avg += per_cpu(nr, cpu) * diff;
 
 		tmp_big_avg += per_cpu(nr_big_prod_sum, cpu);
-		tmp_big_avg += nr_eligible_big_tasks(cpu) *
-			(curr_time - per_cpu(last_time, cpu));
+		tmp_big_avg += nr_eligible_big_tasks(cpu) * diff;
 
 		tmp_iowait += per_cpu(iowait_prod_sum, cpu);
-		tmp_iowait +=  nr_iowait_cpu(cpu) *
-			(curr_time - per_cpu(last_time, cpu));
+		tmp_iowait +=  nr_iowait_cpu(cpu) * diff;
 
 		per_cpu(last_time, cpu) = curr_time;
 
@@ -107,14 +107,15 @@ EXPORT_SYMBOL(sched_get_nr_running_avg);
  */
 void sched_update_nr_prod(int cpu, long delta, bool inc)
 {
-	int diff;
-	s64 curr_time;
+	u64 diff;
+	u64 curr_time;
 	unsigned long flags, nr_running;
 
 	spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
 	nr_running = per_cpu(nr, cpu);
 	curr_time = sched_clock();
 	diff = curr_time - per_cpu(last_time, cpu);
+	BUG_ON((s64)diff < 0);
 	per_cpu(last_time, cpu) = curr_time;
 	per_cpu(nr, cpu) = nr_running + (inc ? delta : -delta);
 
diff --git a/net/rmnet_data/rmnet_data_vnd.c b/net/rmnet_data/rmnet_data_vnd.c
index 6d6893c7d99d..4e3a205551e0 100644
--- a/net/rmnet_data/rmnet_data_vnd.c
+++ b/net/rmnet_data/rmnet_data_vnd.c
@@ -928,7 +928,7 @@ int rmnet_vnd_add_tc_flow(uint32_t id, uint32_t map_flow, uint32_t tc_flow)
 	list_add(&(itm->list), &(dev_conf->flow_head));
 	write_unlock_irqrestore(&dev_conf->flow_map_lock, flags);
 
-	LOGD("Created flow mapping [%s][0x%08X][0x%08X]@%p",
+	LOGD("Created flow mapping [%s][0x%08X][0x%08X]@%pK",
 	     dev->name, itm->map_flow_id, itm->tc_flow_id[0], itm);
 
 	return RMNET_CONFIG_OK;